The generic dpif-netdev datapath was tightly coupled with the rte_flow offload implementation. It contained logic for managing offload threads, queues, and flow mark associations that was specific to rte_flow.
This commit decouples the two by moving all rte_flow-specific logic into the dpif-offload-rte_flow.c provider. The dpif-offload API is extended to support asynchronous flow operations (put/del) with callbacks, making dpif-netdev a generic client of the offload provider. This abstraction results in a cleaner separation of concerns, simplifying the dpif-netdev code and making the offload framework more modular and maintainable. Signed-off-by: Eelco Chaudron <[email protected]> --- v2: - Properly handle flow_mark change on resume callbacks - Add some more comments to the code - Destroy mpsc_queue on thread shutdown - Removed leading spaces in include file - Make dpif_offload_rte_get_netdev() offload argument const - Removed per offload thread time keeping --- lib/automake.mk | 1 + lib/dpif-netdev-private-flow.h | 5 - lib/dpif-netdev.c | 1040 +++++---------------------- lib/dpif-netdev.h | 12 - lib/dpif-offload-provider.h | 4 - lib/dpif-offload-rte_flow-private.h | 34 + lib/dpif-offload-rte_flow.c | 940 ++++++++++++++++++++++-- lib/dpif-offload-tc.c | 8 - lib/dpif-offload.c | 67 -- lib/dpif-offload.h | 1 - lib/netdev-offload-dpdk.c | 130 ++-- lib/netdev-offload-dpdk.h | 57 +- 12 files changed, 1201 insertions(+), 1098 deletions(-) create mode 100644 lib/dpif-offload-rte_flow-private.h diff --git a/lib/automake.mk b/lib/automake.mk index e9d449290..ba708da6a 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -508,6 +508,7 @@ if DPDK_NETDEV lib_libopenvswitch_la_SOURCES += \ lib/dpdk.c \ lib/dpif-offload-rte_flow.c \ + lib/dpif-offload-rte_flow-private.h \ lib/netdev-dpdk.c \ lib/netdev-offload-dpdk.c \ lib/netdev-offload-dpdk.h diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h index 7425dd44e..2d76c5e32 100644 --- a/lib/dpif-netdev-private-flow.h +++ b/lib/dpif-netdev-private-flow.h @@ -109,11 +109,6 @@ struct dp_netdev_flow { /* Statistics. */ struct dp_netdev_flow_stats stats; - /* Statistics and attributes received from the netdev offload provider. */ - atomic_int netdev_flow_get_result; - struct dp_netdev_flow_stats last_stats; - struct dp_netdev_flow_attrs last_attrs; - /* Actions. */ OVSRCU_TYPE(struct dp_netdev_actions *) actions; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index a653a95ab..d9cbcc46d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -290,6 +290,10 @@ struct dp_netdev { struct ovs_mutex meters_lock; struct cmap meters OVS_GUARDED; + /* Flow Mark to flow mapping. */ + struct ovs_mutex mark_to_flow_lock; + struct cmap mark_to_flow OVS_GUARDED; + /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/ atomic_uint32_t emc_insert_min; /* Enable collection of PMD performance metrics. */ @@ -359,97 +363,6 @@ enum rxq_cycles_counter_type { RXQ_N_CYCLES }; -enum dp_offload_type { - DP_OFFLOAD_FLOW, - DP_OFFLOAD_FLUSH, -}; - -enum { - DP_NETDEV_FLOW_OFFLOAD_OP_ADD, - DP_NETDEV_FLOW_OFFLOAD_OP_MOD, - DP_NETDEV_FLOW_OFFLOAD_OP_DEL, -}; - -struct dp_offload_flow_item { - struct dp_netdev_flow *flow; - int op; - struct match match; - struct nlattr *actions; - size_t actions_len; - odp_port_t orig_in_port; /* Originating in_port for tnl flows. */ -}; - -struct dp_offload_flush_item { - struct netdev *netdev; - struct ovs_barrier *barrier; -}; - -union dp_offload_thread_data { - struct dp_offload_flow_item flow; - struct dp_offload_flush_item flush; -}; - -struct dp_offload_thread_item { - struct mpsc_queue_node node; - enum dp_offload_type type; - long long int timestamp; - struct dp_netdev *dp; - union dp_offload_thread_data data[0]; -}; - -struct dp_offload_thread { - PADDED_MEMBERS(CACHE_LINE_SIZE, - struct mpsc_queue queue; - atomic_uint64_t enqueued_item; - struct cmap megaflow_to_mark; - struct cmap mark_to_flow; - struct mov_avg_cma cma; - struct mov_avg_ema ema; - ); -}; -static struct dp_offload_thread *dp_offload_threads; -static void *dp_netdev_flow_offload_main(void *arg); - -/* XXX: Temporarily forward declarations, will be removed during cleanup. */ -static unsigned int rte_flow_offload_ufid_to_thread_id(const ovs_u128 ufid); -static unsigned int rte_flow_offload_thread_init(void); -void rte_flow_offload_thread_set_thread_nb(unsigned int thread_nb); -unsigned int rte_flow_offload_thread_nb(void); -unsigned int rte_flow_offload_thread_id(void); - -/* XXX: Temporarily external declarations, will be removed during cleanup. */ -struct netdev *dpif_netdev_offload_get_netdev_by_port_id(odp_port_t); - -static void -dp_netdev_offload_init(void) -{ - static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - unsigned int nb_offload_thread = rte_flow_offload_thread_nb(); - unsigned int tid; - - if (!ovsthread_once_start(&once)) { - return; - } - - dp_offload_threads = xcalloc(nb_offload_thread, - sizeof *dp_offload_threads); - - for (tid = 0; tid < nb_offload_thread; tid++) { - struct dp_offload_thread *thread; - - thread = &dp_offload_threads[tid]; - mpsc_queue_init(&thread->queue); - cmap_init(&thread->megaflow_to_mark); - cmap_init(&thread->mark_to_flow); - atomic_init(&thread->enqueued_item, 0); - mov_avg_cma_init(&thread->cma); - mov_avg_ema_init(&thread->ema, 100); - ovs_thread_create("rte_offload", dp_netdev_flow_offload_main, thread); - } - - ovsthread_once_done(&once); -} - #define XPS_TIMEOUT 500000LL /* In microseconds. */ /* Contained by struct dp_netdev_port's 'rxqs' member. */ @@ -622,9 +535,6 @@ static void dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd, uint32_t bond_id) OVS_EXCLUDED(pmd->bond_mutex); -static void dp_netdev_offload_flush(struct dp_netdev *dp, - struct dp_netdev_port *port); - static void reconfigure_datapath(struct dp_netdev *dp) OVS_REQ_RDLOCK(dp->port_rwlock); static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd); @@ -662,8 +572,6 @@ dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, static void dp_netdev_request_reconfigure(struct dp_netdev *dp); static inline bool pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd); -static void queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd, - struct dp_netdev_flow *flow); static void dp_netdev_simple_match_insert(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow) @@ -1907,6 +1815,10 @@ create_dp_netdev(const char *name, const struct dpif_class *class, cmap_init(&dp->meters); ovs_mutex_init(&dp->meters_lock); + /* Init flow mark resources. */ + cmap_init(&dp->mark_to_flow); + ovs_mutex_init(&dp->mark_to_flow_lock); + /* Disable upcalls by default. */ dp_netdev_disable_upcall(dp); dp->upcall_aux = NULL; @@ -2049,6 +1961,9 @@ dp_netdev_free(struct dp_netdev *dp) cmap_destroy(&dp->tx_bonds); ovs_mutex_destroy(&dp->bond_mutex); + cmap_destroy(&dp->mark_to_flow); + ovs_mutex_destroy(&dp->mark_to_flow_lock); + /* Upcalls must be disabled at this point */ dp_netdev_destroy_upcall_lock(dp); @@ -2398,17 +2313,6 @@ do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) seq_change(dp->port_seq); reconfigure_datapath(dp); - - /* Flush and disable offloads only after 'port' has been made - * inaccessible through datapath reconfiguration. - * This prevents having PMDs enqueuing offload requests after - * the flush. - * When only this port is deleted instead of the whole datapath, - * revalidator threads are still active and can still enqueue - * offload modification or deletion. Managing those stray requests - * is done in the offload threads. */ - dp_netdev_offload_flush(dp, port); - port_destroy(port); } @@ -2507,116 +2411,19 @@ dp_netdev_pmd_find_dpcls(struct dp_netdev_pmd_thread *pmd, return cls; } -#define MAX_FLOW_MARK (UINT32_MAX - 1) -#define INVALID_FLOW_MARK 0 -/* Zero flow mark is used to indicate the HW to remove the mark. A packet - * marked with zero mark is received in SW without a mark at all, so it - * cannot be used as a valid mark. - */ - -struct megaflow_to_mark_data { - const struct cmap_node node; - ovs_u128 mega_ufid; - uint32_t mark; -}; - -static struct id_fpool *flow_mark_pool; - -static uint32_t -flow_mark_alloc(void) -{ - static struct ovsthread_once init_once = OVSTHREAD_ONCE_INITIALIZER; - unsigned int tid = rte_flow_offload_thread_id(); - uint32_t mark; - - if (ovsthread_once_start(&init_once)) { - /* Haven't initiated yet, do it here */ - flow_mark_pool = id_fpool_create(rte_flow_offload_thread_nb(), - 1, MAX_FLOW_MARK); - ovsthread_once_done(&init_once); - } - - if (id_fpool_new_id(flow_mark_pool, tid, &mark)) { - return mark; - } - - return INVALID_FLOW_MARK; -} - +/* Associate mark with a flow, which is 1:N mapping */ static void -flow_mark_free(uint32_t mark) +mark_to_flow_associate(struct dp_netdev *dp, const uint32_t mark, + struct dp_netdev_flow *flow) { - unsigned int tid = rte_flow_offload_thread_id(); - - id_fpool_free_id(flow_mark_pool, tid, mark); -} - -/* associate megaflow with a mark, which is a 1:1 mapping */ -static void -megaflow_to_mark_associate(const ovs_u128 *mega_ufid, uint32_t mark) -{ - size_t hash = dp_netdev_flow_hash(mega_ufid); - struct megaflow_to_mark_data *data = xzalloc(sizeof(*data)); - unsigned int tid = rte_flow_offload_thread_id(); - - data->mega_ufid = *mega_ufid; - data->mark = mark; - - cmap_insert(&dp_offload_threads[tid].megaflow_to_mark, - CONST_CAST(struct cmap_node *, &data->node), hash); -} - -/* disassociate meagaflow with a mark */ -static void -megaflow_to_mark_disassociate(const ovs_u128 *mega_ufid) -{ - size_t hash = dp_netdev_flow_hash(mega_ufid); - struct megaflow_to_mark_data *data; - unsigned int tid = rte_flow_offload_thread_id(); - - CMAP_FOR_EACH_WITH_HASH (data, node, hash, - &dp_offload_threads[tid].megaflow_to_mark) { - if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) { - cmap_remove(&dp_offload_threads[tid].megaflow_to_mark, - CONST_CAST(struct cmap_node *, &data->node), hash); - ovsrcu_postpone(free, data); - return; - } - } - - VLOG_WARN("Masked ufid "UUID_FMT" is not associated with a mark?\n", - UUID_ARGS((struct uuid *)mega_ufid)); -} - -static inline uint32_t -megaflow_to_mark_find(const ovs_u128 *mega_ufid) -{ - size_t hash = dp_netdev_flow_hash(mega_ufid); - struct megaflow_to_mark_data *data; - unsigned int tid = rte_flow_offload_thread_id(); - - CMAP_FOR_EACH_WITH_HASH (data, node, hash, - &dp_offload_threads[tid].megaflow_to_mark) { - if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) { - return data->mark; - } - } - - VLOG_DBG("Mark id for ufid "UUID_FMT" was not found\n", - UUID_ARGS((struct uuid *)mega_ufid)); - return INVALID_FLOW_MARK; -} - -/* associate mark with a flow, which is 1:N mapping */ -static void -mark_to_flow_associate(const uint32_t mark, struct dp_netdev_flow *flow) -{ - unsigned int tid = rte_flow_offload_thread_id(); dp_netdev_flow_ref(flow); - cmap_insert(&dp_offload_threads[tid].mark_to_flow, + ovs_mutex_lock(&dp->mark_to_flow_lock); + cmap_insert(&dp->mark_to_flow, CONST_CAST(struct cmap_node *, &flow->mark_node), hash_int(mark, 0)); + ovs_mutex_unlock(&dp->mark_to_flow_lock); + flow->mark = mark; VLOG_DBG("Associated dp_netdev flow %p with mark %u mega_ufid "UUID_FMT, @@ -2624,13 +2431,12 @@ mark_to_flow_associate(const uint32_t mark, struct dp_netdev_flow *flow) } static bool -flow_mark_has_no_ref(uint32_t mark) +flow_mark_has_no_ref(struct dp_netdev *dp, uint32_t mark) { - unsigned int tid = rte_flow_offload_thread_id(); struct dp_netdev_flow *flow; CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0), - &dp_offload_threads[tid].mark_to_flow) { + &dp->mark_to_flow) { if (flow->mark == mark) { return false; } @@ -2639,379 +2445,66 @@ flow_mark_has_no_ref(uint32_t mark) return true; } -static int -mark_to_flow_disassociate(struct dp_netdev *dp, - struct dp_netdev_flow *flow) +static void +mark_to_flow_disassociate(struct dp_netdev *dp, struct dp_netdev_flow *flow) { - struct cmap_node *mark_node = CONST_CAST(struct cmap_node *, - &flow->mark_node); - unsigned int tid = rte_flow_offload_thread_id(); uint32_t mark = flow->mark; - int ret = 0; - /* INVALID_FLOW_MARK may mean that the flow has been disassociated or - * never associated. */ if (OVS_UNLIKELY(mark == INVALID_FLOW_MARK)) { - return EINVAL; + return; } - cmap_remove(&dp_offload_threads[tid].mark_to_flow, - mark_node, hash_int(mark, 0)); flow->mark = INVALID_FLOW_MARK; - /* - * no flow is referencing the mark any more? If so, let's - * remove the flow from hardware and free the mark. - */ - if (flow_mark_has_no_ref(mark)) { - struct netdev *port; - odp_port_t in_port = flow->flow.in_port.odp_port; - - port = dpif_netdev_offload_get_netdev_by_port_id(in_port); - if (port) { - /* Taking a global 'port_rwlock' to fulfill thread safety - * restrictions regarding netdev port mapping. */ - ovs_rwlock_rdlock(&dp->port_rwlock); - ret = netdev_offload_dpdk_flow_del(port, &flow->mega_ufid, NULL); - ovs_rwlock_unlock(&dp->port_rwlock); - } - - flow_mark_free(mark); - VLOG_DBG("Freed flow mark %u mega_ufid "UUID_FMT, mark, - UUID_ARGS((struct uuid *) &flow->mega_ufid)); + ovs_mutex_lock(&dp->mark_to_flow_lock); + cmap_remove(&dp->mark_to_flow, + CONST_CAST(struct cmap_node *, &flow->mark_node), + hash_int(mark, 0)); + ovs_mutex_unlock(&dp->mark_to_flow_lock); - megaflow_to_mark_disassociate(&flow->mega_ufid); - } dp_netdev_flow_unref(flow); - - return ret; -} - -static struct dp_netdev_flow * -mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd, - const uint32_t mark) -{ - struct dp_netdev_flow *flow; - unsigned int tid; - size_t hash; - - if (dp_offload_threads == NULL) { - return NULL; - } - - hash = hash_int(mark, 0); - for (tid = 0; tid < rte_flow_offload_thread_nb(); tid++) { - CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash, - &dp_offload_threads[tid].mark_to_flow) { - if (flow->mark == mark && flow->pmd_id == pmd->core_id && - flow->dead == false) { - return flow; - } - } - } - - return NULL; -} - -static struct dp_offload_thread_item * -dp_netdev_alloc_flow_offload(struct dp_netdev *dp, - struct dp_netdev_flow *flow, - int op) -{ - struct dp_offload_thread_item *item; - struct dp_offload_flow_item *flow_offload; - - item = xzalloc(sizeof *item + sizeof *flow_offload); - flow_offload = &item->data->flow; - - item->type = DP_OFFLOAD_FLOW; - item->dp = dp; - - flow_offload->flow = flow; - flow_offload->op = op; - - dp_netdev_flow_ref(flow); - - return item; } static void -dp_netdev_free_flow_offload__(struct dp_offload_thread_item *offload) +mark_to_flow_disassociate_all(struct dp_netdev *dp, const uint32_t mark) { - struct dp_offload_flow_item *flow_offload = &offload->data->flow; - - free(flow_offload->actions); - free(offload); -} - -static void -dp_netdev_free_flow_offload(struct dp_offload_thread_item *offload) -{ - struct dp_offload_flow_item *flow_offload = &offload->data->flow; - - dp_netdev_flow_unref(flow_offload->flow); - ovsrcu_postpone(dp_netdev_free_flow_offload__, offload); -} - -static void -dp_netdev_free_offload(struct dp_offload_thread_item *offload) -{ - switch (offload->type) { - case DP_OFFLOAD_FLOW: - dp_netdev_free_flow_offload(offload); - break; - case DP_OFFLOAD_FLUSH: - free(offload); - break; - default: - OVS_NOT_REACHED(); - }; -} - -static void -dp_netdev_append_offload(struct dp_offload_thread_item *offload, - unsigned int tid) -{ - dp_netdev_offload_init(); - - mpsc_queue_insert(&dp_offload_threads[tid].queue, &offload->node); - atomic_count_inc64(&dp_offload_threads[tid].enqueued_item); -} - -static void -dp_netdev_offload_flow_enqueue(struct dp_offload_thread_item *item) -{ - struct dp_offload_flow_item *flow_offload = &item->data->flow; - unsigned int tid; - - ovs_assert(item->type == DP_OFFLOAD_FLOW); - - tid = rte_flow_offload_ufid_to_thread_id(flow_offload->flow->mega_ufid); - dp_netdev_append_offload(item, tid); -} - -static int -dp_netdev_flow_offload_del(struct dp_offload_thread_item *item) -{ - return mark_to_flow_disassociate(item->dp, item->data->flow.flow); -} - -/* - * There are two flow offload operations here: addition and modification. - * - * For flow addition, this function does: - * - allocate a new flow mark id - * - perform hardware flow offload - * - associate the flow mark with flow and mega flow - * - * For flow modification, both flow mark and the associations are still - * valid, thus only item 2 needed. - */ -static int -dp_netdev_flow_offload_put(struct dp_offload_thread_item *item) -{ - struct dp_offload_flow_item *offload = &item->data->flow; - struct dp_netdev *dp = item->dp; - struct dp_netdev_flow *flow = offload->flow; - odp_port_t in_port = flow->flow.in_port.odp_port; - bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD - && flow->mark != INVALID_FLOW_MARK; - struct dpif_netdev_offload_info info; - struct netdev *port; - uint32_t mark; - int ret; + size_t hash = hash_int(mark, 0); + struct dp_netdev_flow *flow; - if (flow->dead) { - return -1; + if (OVS_UNLIKELY(mark == INVALID_FLOW_MARK)) { + return; } - if (modification) { - mark = flow->mark; - } else { - /* - * If a mega flow has already been offloaded (from other PMD - * instances), do not offload it again. - */ - mark = megaflow_to_mark_find(&flow->mega_ufid); - if (mark != INVALID_FLOW_MARK) { - VLOG_DBG("Flow has already been offloaded with mark %u\n", mark); - if (flow->mark != INVALID_FLOW_MARK) { - ovs_assert(flow->mark == mark); - } else { - mark_to_flow_associate(mark, flow); + ovs_mutex_lock(&dp->mark_to_flow_lock); + CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash, &dp->mark_to_flow) { + if (flow->mark == mark) { + flow->mark = INVALID_FLOW_MARK; + cmap_remove(&dp->mark_to_flow, + CONST_CAST(struct cmap_node *, &flow->mark_node), + hash_int(mark, 0)); + dp_netdev_flow_unref(flow); } - return 0; - } - - mark = flow_mark_alloc(); - if (mark == INVALID_FLOW_MARK) { - VLOG_ERR("Failed to allocate flow mark!\n"); - return -1; - } - } - info.flow_mark = mark; - info.orig_in_port = offload->orig_in_port; - - port = dpif_netdev_offload_get_netdev_by_port_id(in_port); - if (!port) { - goto err_free; - } - - /* Taking a global 'port_rwlock' to fulfill thread safety - * restrictions regarding the netdev port mapping. */ - ovs_rwlock_rdlock(&dp->port_rwlock); - ret = netdev_offload_dpdk_flow_put( - port, &offload->match, CONST_CAST(struct nlattr *, offload->actions), - offload->actions_len, &flow->mega_ufid, &info, NULL); - ovs_rwlock_unlock(&dp->port_rwlock); - - if (ret) { - goto err_free; - } - - if (!modification) { - megaflow_to_mark_associate(&flow->mega_ufid, mark); - mark_to_flow_associate(mark, flow); - } - return 0; - -err_free: - if (!modification) { - flow_mark_free(mark); - } else { - mark_to_flow_disassociate(item->dp, flow); - } - return -1; -} - -static void -dp_offload_flow(struct dp_offload_thread_item *item) -{ - struct dp_offload_flow_item *flow_offload = &item->data->flow; - const char *op; - int ret; - - switch (flow_offload->op) { - case DP_NETDEV_FLOW_OFFLOAD_OP_ADD: - op = "add"; - ret = dp_netdev_flow_offload_put(item); - break; - case DP_NETDEV_FLOW_OFFLOAD_OP_MOD: - op = "modify"; - ret = dp_netdev_flow_offload_put(item); - break; - case DP_NETDEV_FLOW_OFFLOAD_OP_DEL: - op = "delete"; - ret = dp_netdev_flow_offload_del(item); - break; - default: - OVS_NOT_REACHED(); } - - VLOG_DBG("%s to %s netdev flow "UUID_FMT, - ret == 0 ? "succeed" : "failed", op, - UUID_ARGS((struct uuid *) &flow_offload->flow->mega_ufid)); + ovs_mutex_unlock(&dp->mark_to_flow_lock); } -static void -dp_offload_flush(struct dp_offload_thread_item *item) +static struct dp_netdev_flow * +mark_to_flow_find(const struct dp_netdev_pmd_thread *pmd, + const uint32_t mark) { - struct dp_offload_flush_item *flush = &item->data->flush; - - ovs_rwlock_rdlock(&item->dp->port_rwlock); - dpif_offload_netdev_flush_flows(flush->netdev); - ovs_rwlock_unlock(&item->dp->port_rwlock); - - ovs_barrier_block(flush->barrier); - - /* Allow the initiator thread to take again the port lock, - * before continuing offload operations in this thread. - */ - ovs_barrier_block(flush->barrier); -} - -#define DP_NETDEV_OFFLOAD_BACKOFF_MIN 1 -#define DP_NETDEV_OFFLOAD_BACKOFF_MAX 64 -#define DP_NETDEV_OFFLOAD_QUIESCE_INTERVAL_US (10 * 1000) /* 10 ms */ - -static void * -dp_netdev_flow_offload_main(void *arg) -{ - struct dp_offload_thread *ofl_thread = arg; - struct dp_offload_thread_item *offload; - struct mpsc_queue_node *node; - struct mpsc_queue *queue; - long long int latency_us; - long long int next_rcu; - long long int now; - uint64_t backoff; - - queue = &ofl_thread->queue; - mpsc_queue_acquire(queue); - - while (true) { - backoff = DP_NETDEV_OFFLOAD_BACKOFF_MIN; - while (mpsc_queue_tail(queue) == NULL) { - xnanosleep(backoff * 1E6); - if (backoff < DP_NETDEV_OFFLOAD_BACKOFF_MAX) { - backoff <<= 1; - } - } - - next_rcu = time_usec() + DP_NETDEV_OFFLOAD_QUIESCE_INTERVAL_US; - MPSC_QUEUE_FOR_EACH_POP (node, queue) { - offload = CONTAINER_OF(node, struct dp_offload_thread_item, node); - atomic_count_dec64(&ofl_thread->enqueued_item); - - switch (offload->type) { - case DP_OFFLOAD_FLOW: - dp_offload_flow(offload); - break; - case DP_OFFLOAD_FLUSH: - dp_offload_flush(offload); - break; - default: - OVS_NOT_REACHED(); - } - - now = time_usec(); - - latency_us = now - offload->timestamp; - mov_avg_cma_update(&ofl_thread->cma, latency_us); - mov_avg_ema_update(&ofl_thread->ema, latency_us); + struct dp_netdev_flow *flow; - dp_netdev_free_offload(offload); + CMAP_FOR_EACH_WITH_HASH (flow, mark_node, hash_int(mark, 0), + &pmd->dp->mark_to_flow) { - /* Do RCU synchronization at fixed interval. */ - if (now > next_rcu) { - ovsrcu_quiesce(); - next_rcu = time_usec() + DP_NETDEV_OFFLOAD_QUIESCE_INTERVAL_US; - } + if (flow->mark == mark && flow->pmd_id == pmd->core_id && + flow->dead == false) { + return flow; } } - - OVS_NOT_REACHED(); - mpsc_queue_release(queue); - return NULL; } -static void -queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd, - struct dp_netdev_flow *flow) -{ - struct dp_offload_thread_item *offload; - - if (!dpif_offload_is_offload_enabled()) { - return; - } - - offload = dp_netdev_alloc_flow_offload(pmd->dp, flow, - DP_NETDEV_FLOW_OFFLOAD_OP_DEL); - offload->timestamp = pmd->ctx.now; - dp_netdev_offload_flow_enqueue(offload); -} - static void log_netdev_flow_change(const struct dp_netdev_flow *flow, const struct match *match, @@ -3078,28 +2571,53 @@ log_netdev_flow_change(const struct dp_netdev_flow *flow, } static void -queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, - struct dp_netdev_flow *flow, struct match *match, - const struct nlattr *actions, size_t actions_len, - int op) +offload_flow_del_resume(int error, uint32_t flow_mark) { - struct dp_offload_thread_item *item; - struct dp_offload_flow_item *flow_offload; + if (error == EINPROGRESS) { + return; + } + dpif_offload_free_flow_mark(flow_mark); +} - if (!dpif_offload_is_offload_enabled()) { +static void +offload_flow_del_resume_cb(void *aux_dp OVS_UNUSED, void *aux_flow OVS_UNUSED, + struct dpif_flow_stats *stats OVS_UNUSED, + uint32_t flow_mark, + int error) +{ + offload_flow_del_resume(error, flow_mark); +} + +static void +offload_flow_del(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow) +{ + odp_port_t in_port = flow->flow.in_port.odp_port; + + if (flow->mark == INVALID_FLOW_MARK) { return; } - item = dp_netdev_alloc_flow_offload(pmd->dp, flow, op); - flow_offload = &item->data->flow; - flow_offload->match = *match; - flow_offload->actions = xmalloc(actions_len); - nullable_memcpy(flow_offload->actions, actions, actions_len); - flow_offload->actions_len = actions_len; - flow_offload->orig_in_port = flow->orig_in_port; + mark_to_flow_disassociate(pmd->dp, flow); + + if (flow_mark_has_no_ref(pmd->dp, flow->mark) + && dpif_offload_is_offload_enabled()) { + struct dpif_offload_flow_del del = { + .in_port = in_port, + .ufid = CONST_CAST(ovs_u128 *, &flow->mega_ufid), + .stats = NULL, + .cb_data.callback = offload_flow_del_resume_cb, + }; + uint32_t mark; + int ret; - item->timestamp = pmd->ctx.now; - dp_netdev_offload_flow_enqueue(item); + ret = dpif_offload_datapath_flow_del(pmd->dp->full_name, &del, &mark); + if (ret && ret != EINPROGRESS) { + VLOG_DBG("Failed removing offload flow ufid "UUID_FMT + " from port %d", + UUID_ARGS((struct uuid *)&flow->mega_ufid), in_port); + } + offload_flow_del_resume(ret, mark); + } } static void @@ -3117,99 +2635,12 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, dp_netdev_simple_match_remove(pmd, flow); cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid)); ccmap_dec(&pmd->n_flows, odp_to_u32(in_port)); - queue_netdev_flow_del(pmd, flow); + offload_flow_del(pmd, flow); flow->dead = true; dp_netdev_flow_unref(flow); } -static void -dp_netdev_offload_flush_enqueue(struct dp_netdev *dp, - struct netdev *netdev, - struct ovs_barrier *barrier) -{ - unsigned int tid; - long long int now_us = time_usec(); - - for (tid = 0; tid < rte_flow_offload_thread_nb(); tid++) { - struct dp_offload_thread_item *item; - struct dp_offload_flush_item *flush; - - item = xmalloc(sizeof *item + sizeof *flush); - item->type = DP_OFFLOAD_FLUSH; - item->dp = dp; - item->timestamp = now_us; - - flush = &item->data->flush; - flush->netdev = netdev; - flush->barrier = barrier; - - dp_netdev_append_offload(item, tid); - } -} - -/* Blocking call that will wait on the offload thread to - * complete its work. As the flush order will only be - * enqueued after existing offload requests, those previous - * offload requests must be processed, which requires being - * able to lock the 'port_rwlock' from the offload thread. - * - * Flow offload flush is done when a port is being deleted. - * Right after this call executes, the offload API is disabled - * for the port. This call must be made blocking until the - * offload provider completed its job. - */ -static void -dp_netdev_offload_flush(struct dp_netdev *dp, - struct dp_netdev_port *port) - OVS_REQ_WRLOCK(dp->port_rwlock) -{ - /* The flush mutex serves to exclude mutual access to the static - * barrier, and to prevent multiple flush orders to several threads. - * - * The memory barrier needs to go beyond the function scope as - * the other threads can resume from blocking after this function - * already finished. - * - * Additionally, because the flush operation is blocking, it would - * deadlock if multiple offload threads were blocking on several - * different barriers. Only allow a single flush order in the offload - * queue at a time. - */ - static struct ovs_mutex flush_mutex = OVS_MUTEX_INITIALIZER; - static struct ovs_barrier barrier OVS_GUARDED_BY(flush_mutex); - struct netdev *netdev; - - if (!dpif_offload_is_offload_enabled()) { - return; - } - - ovs_rwlock_unlock(&dp->port_rwlock); - ovs_mutex_lock(&flush_mutex); - - /* This thread and the offload threads. */ - ovs_barrier_init(&barrier, 1 + rte_flow_offload_thread_nb()); - - netdev = netdev_ref(port->netdev); - dp_netdev_offload_flush_enqueue(dp, netdev, &barrier); - ovs_barrier_block(&barrier); - netdev_close(netdev); - - /* Take back the datapath port lock before allowing the offload - * threads to proceed further. The port deletion must complete first, - * to ensure no further offloads are inserted after the flush. - * - * Some offload provider (e.g. DPDK) keeps a netdev reference with - * the offload data. If this reference is not closed, the netdev is - * kept indefinitely. */ - ovs_rwlock_wrlock(&dp->port_rwlock); - - ovs_barrier_block(&barrier); - ovs_barrier_destroy(&barrier); - - ovs_mutex_unlock(&flush_mutex); -} - static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd) { @@ -3649,112 +3080,7 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd, } static void -dp_netdev_flow_set_last_stats_attrs(struct dp_netdev_flow *netdev_flow, - const struct dpif_flow_stats *stats, - const struct dpif_flow_attrs *attrs, - int result) -{ - struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats; - struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs; - - atomic_store_relaxed(&netdev_flow->netdev_flow_get_result, result); - if (result) { - return; - } - - atomic_store_relaxed(&last_stats->used, stats->used); - atomic_store_relaxed(&last_stats->packet_count, stats->n_packets); - atomic_store_relaxed(&last_stats->byte_count, stats->n_bytes); - atomic_store_relaxed(&last_stats->tcp_flags, stats->tcp_flags); - - atomic_store_relaxed(&last_attrs->offloaded, attrs->offloaded); - atomic_store_relaxed(&last_attrs->dp_layer, attrs->dp_layer); - -} - -static void -dp_netdev_flow_get_last_stats_attrs(struct dp_netdev_flow *netdev_flow, - struct dpif_flow_stats *stats, - struct dpif_flow_attrs *attrs, - int *result) -{ - struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats; - struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs; - - atomic_read_relaxed(&netdev_flow->netdev_flow_get_result, result); - if (*result) { - return; - } - - atomic_read_relaxed(&last_stats->used, &stats->used); - atomic_read_relaxed(&last_stats->packet_count, &stats->n_packets); - atomic_read_relaxed(&last_stats->byte_count, &stats->n_bytes); - atomic_read_relaxed(&last_stats->tcp_flags, &stats->tcp_flags); - - atomic_read_relaxed(&last_attrs->offloaded, &attrs->offloaded); - atomic_read_relaxed(&last_attrs->dp_layer, &attrs->dp_layer); -} - -static bool -dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp, - struct dp_netdev_flow *netdev_flow, - struct dpif_flow_stats *stats, - struct dpif_flow_attrs *attrs) -{ - uint64_t act_buf[1024 / 8]; - struct nlattr *actions; - struct netdev *netdev; - struct match match; - struct ofpbuf buf; - - int ret = 0; - - if (!dpif_offload_is_offload_enabled()) { - return false; - } - - netdev = dpif_netdev_offload_get_netdev_by_port_id( - netdev_flow->flow.in_port.odp_port); - if (!netdev) { - return false; - } - ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf); - /* Taking a global 'port_rwlock' to fulfill thread safety - * restrictions regarding netdev port mapping. - * - * XXX: Main thread will try to pause/stop all revalidators during datapath - * reconfiguration via datapath purge callback (dp_purge_cb) while - * rw-holding 'dp->port_rwlock'. So we're not waiting for lock here. - * Otherwise, deadlock is possible, because revalidators might sleep - * waiting for the main thread to release the lock and main thread - * will wait for them to stop processing. - * This workaround might make statistics less accurate. Especially - * for flow deletion case, since there will be no other attempt. */ - if (!ovs_rwlock_tryrdlock(&dp->port_rwlock)) { - ret = netdev_offload_dpdk_flow_get(netdev, &match, &actions, - &netdev_flow->mega_ufid, stats, - attrs, &buf); - /* Storing statistics and attributes from the last request for - * later use on mutex contention. */ - dp_netdev_flow_set_last_stats_attrs(netdev_flow, stats, attrs, ret); - ovs_rwlock_unlock(&dp->port_rwlock); - } else { - dp_netdev_flow_get_last_stats_attrs(netdev_flow, stats, attrs, &ret); - if (!ret && !attrs->dp_layer) { - /* Flow was never reported as 'offloaded' so it's harmless - * to continue to think so. */ - ret = EAGAIN; - } - } - if (ret) { - return false; - } - - return true; -} - -static void -get_dpif_flow_status(const struct dp_netdev *dp, +get_dpif_flow_status(const struct dp_netdev *dp OVS_UNUSED, const struct dp_netdev_flow *netdev_flow_, struct dpif_flow_stats *stats, struct dpif_flow_attrs *attrs) @@ -3777,8 +3103,10 @@ get_dpif_flow_status(const struct dp_netdev *dp, atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags); stats->tcp_flags = flags; - if (dpif_netdev_get_flow_offload_status(dp, netdev_flow, - &offload_stats, &offload_attrs)) { + if (dpif_offload_datapath_flow_stats(dp->full_name, + netdev_flow->flow.in_port.odp_port, + &netdev_flow->mega_ufid, + &offload_stats, &offload_attrs)) { stats->n_packets += offload_stats.n_packets; stats->n_bytes += offload_stats.n_bytes; stats->used = MAX(stats->used, offload_stats.used); @@ -4149,6 +3477,80 @@ dp_netdev_flow_is_simple_match(const struct match *match) return true; } +static void +offload_flow_put_resume(struct dp_netdev *dp, + struct dp_netdev_flow *flow, uint32_t flow_mark, + int error) +{ + if (error == EINPROGRESS) { + return; + } + + if (!error) { + if (flow_mark != INVALID_FLOW_MARK) { + if (flow->mark == INVALID_FLOW_MARK) { + mark_to_flow_associate(dp, flow_mark, flow); + } else if (flow->mark != flow_mark) { + /* The flow mark has changed. */ + mark_to_flow_disassociate_all(dp, flow->mark); + dpif_offload_free_flow_mark(flow->mark); + mark_to_flow_associate(dp, flow_mark, flow); + } + } + } else { + /* On error, no flow should be associated with this flow mark, + * and we should free it. */ + if (flow->mark != INVALID_FLOW_MARK) { + mark_to_flow_disassociate_all(dp, flow_mark); + dpif_offload_free_flow_mark(flow_mark); + } + } + dp_netdev_flow_unref(flow); +} + +static void +offload_flow_put_resume_cb(void *aux_dp, void *aux_flow, + struct dpif_flow_stats *stats OVS_UNUSED, + uint32_t flow_mark, + int error) +{ + struct dp_netdev *dp = aux_dp; + struct dp_netdev_flow *flow = aux_flow; + + offload_flow_put_resume(dp, flow, flow_mark, error); +} + +static void +offload_flow_put(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow, + struct match *match, const struct nlattr *actions, + size_t actions_len, bool modify) +{ + struct dpif_offload_flow_put put = { + .modify = modify, + .in_port = match->flow.in_port.odp_port, + .orig_in_port = flow->orig_in_port, + .ufid = CONST_CAST(ovs_u128 *, &flow->mega_ufid), + .match = match, + .actions = actions, + .actions_len = actions_len, + .stats = NULL, + .cb_data.callback = offload_flow_put_resume_cb, + .cb_data.callback_aux_dp = pmd->dp, + .cb_data.callback_aux_flow = flow, + }; + uint32_t flow_mark; + int error; + + if (!dpif_offload_is_offload_enabled()) { + return; + } + + dp_netdev_flow_ref(flow); + error = dpif_offload_datapath_flow_put(pmd->dp->full_name, &put, + &flow_mark); + offload_flow_put_resume(pmd->dp, flow, flow_mark, error); +} + static struct dp_netdev_flow * dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, struct match *match, const ovs_u128 *ufid, @@ -4185,9 +3587,6 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, /* Do not allocate extra space. */ flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len); memset(&flow->stats, 0, sizeof flow->stats); - atomic_init(&flow->netdev_flow_get_result, 0); - memset(&flow->last_stats, 0, sizeof flow->last_stats); - memset(&flow->last_attrs, 0, sizeof flow->last_attrs); flow->dead = false; flow->batch = NULL; flow->mark = INVALID_FLOW_MARK; @@ -4225,8 +3624,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, dp_netdev_simple_match_insert(pmd, flow); } - queue_netdev_flow_put(pmd, flow, match, actions, actions_len, - DP_NETDEV_FLOW_OFFLOAD_OP_ADD); + offload_flow_put(pmd, flow, match, actions, actions_len, false); log_netdev_flow_change(flow, match, NULL, actions, actions_len); return flow; @@ -4286,9 +3684,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, old_actions = dp_netdev_flow_get_actions(netdev_flow); ovsrcu_set(&netdev_flow->actions, new_actions); - queue_netdev_flow_put(pmd, netdev_flow, match, - put->actions, put->actions_len, - DP_NETDEV_FLOW_OFFLOAD_OP_MOD); + offload_flow_put(pmd, netdev_flow, match, put->actions, + put->actions_len, true); log_netdev_flow_change(netdev_flow, match, old_actions, put->actions, put->actions_len); @@ -8408,7 +7805,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, size_t *n_flows, uint8_t *index_map, bool md_is_valid, odp_port_t port_no) { - const bool netdev_flow_api = dpif_offload_is_offload_enabled(); + const bool offload_enabled = dpif_offload_is_offload_enabled(); const uint32_t recirc_depth = *recirc_depth_get(); const size_t cnt = dp_packet_batch_size(packets_); size_t n_missed = 0, n_emc_hit = 0, n_phwol_hit = 0; @@ -8452,7 +7849,7 @@ dfc_processing(struct dp_netdev_pmd_thread *pmd, pkt_metadata_init(&packet->md, port_no); } - if (netdev_flow_api && recirc_depth == 0) { + if (offload_enabled && recirc_depth == 0) { if (OVS_UNLIKELY(dp_netdev_hw_flow(pmd, packet, &flow))) { /* Packet restoration failed and it was dropped, do not * continue processing. @@ -10467,86 +9864,3 @@ dpcls_lookup(struct dpcls *cls, const struct netdev_flow_key *keys[], } return false; } -/* XXX: Temporarily duplicates definition in dpif-offload-rte_flow.c. */ -#define DEFAULT_OFFLOAD_THREAD_NB 1 -static unsigned int offload_thread_nb = DEFAULT_OFFLOAD_THREAD_NB; - -DECLARE_EXTERN_PER_THREAD_DATA(unsigned int, rte_flow_offload_thread_id); -DEFINE_EXTERN_PER_THREAD_DATA(rte_flow_offload_thread_id, OVSTHREAD_ID_UNSET); - -unsigned int -rte_flow_offload_thread_id(void) -{ - unsigned int id = *rte_flow_offload_thread_id_get(); - - if (OVS_UNLIKELY(id == OVSTHREAD_ID_UNSET)) { - id = rte_flow_offload_thread_init(); - } - - return id; -} - -unsigned int -rte_flow_offload_thread_nb(void) -{ - return offload_thread_nb; -} - -void -rte_flow_offload_thread_set_thread_nb(unsigned int thread_nb) -{ - offload_thread_nb = thread_nb; -} - -static unsigned int -rte_flow_offload_ufid_to_thread_id(const ovs_u128 ufid) -{ - uint32_t ufid_hash; - - if (rte_flow_offload_thread_nb() == 1) { - return 0; - } - - ufid_hash = hash_words64_inline( - (const uint64_t [2]){ ufid.u64.lo, - ufid.u64.hi }, 2, 1); - return ufid_hash % rte_flow_offload_thread_nb(); -} - -static unsigned int -rte_flow_offload_thread_init(void) -{ - static atomic_count next_id = ATOMIC_COUNT_INIT(0); - bool thread_is_hw_offload; - bool thread_is_rcu; - - thread_is_hw_offload = !strncmp(get_subprogram_name(), - "rte_offload", strlen("rte_offload")); - thread_is_rcu = !strncmp(get_subprogram_name(), "urcu", strlen("urcu")); - - /* Panic if any other thread besides offload and RCU tries - * to initialize their thread ID. */ - ovs_assert(thread_is_hw_offload || thread_is_rcu); - - if (*rte_flow_offload_thread_id_get() == OVSTHREAD_ID_UNSET) { - unsigned int id; - - if (thread_is_rcu) { - /* RCU will compete with other threads for shared object access. - * Reclamation functions using a thread ID must be thread-safe. - * For that end, and because RCU must consider all potential shared - * objects anyway, its thread-id can be whichever, so return 0. - */ - id = 0; - } else { - /* Only the actual offload threads have their own ID. */ - id = atomic_count_inc(&next_id); - } - /* Panic if any offload thread is getting a spurious ID. */ - ovs_assert(id < rte_flow_offload_thread_nb()); - return *rte_flow_offload_thread_id_get() = id; - } else { - return *rte_flow_offload_thread_id_get(); - } -} - diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index 64bc5615f..6db6ed2e2 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -38,18 +38,6 @@ bool dpif_is_netdev(const struct dpif *); #define NR_QUEUE 1 #define NR_PMD_THREADS 1 -/* Flow offloading info structure. XXX: This needs to be moved once the - * implementation is migrated to dpif-offload-rte_flow. */ -struct dpif_netdev_offload_info { - /* - * The flow mark id assigned to the flow. If any pkts hit the flow, - * it will be in the pkt meta data. - */ - uint32_t flow_mark; - - odp_port_t orig_in_port; /* Originating in_port for tnl flows. */ -}; - #ifdef __cplusplus } #endif diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h index d65d17947..fe7ec9c3d 100644 --- a/lib/dpif-offload-provider.h +++ b/lib/dpif-offload-provider.h @@ -281,10 +281,6 @@ struct dpif_offload_class { * reasons. They are intended for use in fast path processing and should * be designed with speed and efficiency in mind. */ - /* Deletes all offloaded flows on this netdev. Return 0 if successful, - * otherwise returns a positive errno value. */ - int (*netdev_flow_flush)(const struct dpif_offload *, struct netdev *); - /* Recover the packet state (contents and data) for continued processing * in software. Return 0 if successful, otherwise returns a positive * errno value and takes ownership of a packet if errno != EOPNOTSUPP. */ diff --git a/lib/dpif-offload-rte_flow-private.h b/lib/dpif-offload-rte_flow-private.h new file mode 100644 index 000000000..41c17a7df --- /dev/null +++ b/lib/dpif-offload-rte_flow-private.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025 Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DPIF_OFFLOAD_RTE_FLOW_PRIVATE_H +#define DPIF_OFFLOAD_RTE_FLOW_PRIVATE_H + +/* Forward declarations of private structures. */ +struct dpif_offload_rte_flow; +struct netdev; + +/* DPIF offload RTE flow implementation-specific functions. + * These should only be used by the associated netdev offload provider, + * i.e., netdev-offload-dpdk. */ +unsigned int rte_flow_offload_thread_id(void); +struct netdev *dpif_offload_rte_get_netdev( + const struct dpif_offload_rte_flow *, odp_port_t port_no); +void dpif_offload_rte_traverse_ports( + const struct dpif_offload_rte_flow *offload, + bool (*cb)(struct netdev *, odp_port_t, void *), void *aux); + +#endif /* DPIF_OFFLOAD_RTE_FLOW_PRIVATE_H */ diff --git a/lib/dpif-offload-rte_flow.c b/lib/dpif-offload-rte_flow.c index 8a7ae7c23..b7499adbf 100644 --- a/lib/dpif-offload-rte_flow.c +++ b/lib/dpif-offload-rte_flow.c @@ -19,28 +19,90 @@ #include "dpif-offload.h" #include "dpif-offload-provider.h" +#include "dpif-offload-rte_flow-private.h" +#include "id-fpool.h" +#include "mov-avg.h" +#include "mpsc-queue.h" #include "netdev-offload-dpdk.h" #include "netdev-provider.h" #include "netdev-vport.h" #include "util.h" +#include "uuid.h" #include "openvswitch/json.h" +#include "openvswitch/match.h" #include "openvswitch/vlog.h" VLOG_DEFINE_THIS_MODULE(dpif_offload_rte_flow); -#define DEFAULT_OFFLOAD_THREAD_NB 1 -#define MAX_OFFLOAD_THREAD_NB 10 +#define DEFAULT_OFFLOAD_THREAD_COUNT 1 +#define MAX_OFFLOAD_THREAD_COUNT 10 -static unsigned int offload_thread_nb = DEFAULT_OFFLOAD_THREAD_NB; +enum rte_offload_type { + RTE_OFFLOAD_FLOW, + RTE_OFFLOAD_FLUSH, +}; + +enum { + RTE_NETDEV_FLOW_OFFLOAD_OP_ADD, + RTE_NETDEV_FLOW_OFFLOAD_OP_MOD, + RTE_NETDEV_FLOW_OFFLOAD_OP_DEL, +}; + +struct rte_offload_thread { + PADDED_MEMBERS(CACHE_LINE_SIZE, + struct mpsc_queue queue; + atomic_uint64_t enqueued_item; + struct cmap megaflow_to_mark; + struct mov_avg_cma cma; + struct mov_avg_ema ema; + struct dpif_offload_rte_flow *offload; + pthread_t thread; + ); +}; + +struct rte_offload_flow_item { + int op; + odp_port_t in_port; + ovs_u128 ufid; + struct match match; + struct nlattr *actions; + size_t actions_len; + odp_port_t orig_in_port; /* Originating in_port for tunnel flows. */ + bool requested_stats; + struct dpif_offload_flow_cb_data callback; +}; + +struct rte_offload_flush_item { + struct netdev *netdev; + struct dpif_offload_rte_flow *offload; + struct ovs_barrier *barrier; +}; + +union rte_offload_thread_data { + struct rte_offload_flow_item flow; + struct rte_offload_flush_item flush; +}; + +struct rte_offload_thread_item { + struct mpsc_queue_node node; + enum rte_offload_type type; + long long int timestamp; + union rte_offload_thread_data data[0]; +}; /* dpif offload interface for the rte implementation. */ struct dpif_offload_rte_flow { struct dpif_offload offload; struct dpif_offload_port_mgr *port_mgr; + atomic_count next_offload_thread_id; + atomic_bool offload_thread_shutdown; + struct rte_offload_thread *offload_threads; + /* Configuration specific variables. */ struct ovsthread_once once_enable; /* Track first-time enablement. */ + unsigned int offload_thread_count; /* Number of offload threads. */ }; static struct dpif_offload_rte_flow * @@ -50,14 +112,556 @@ dpif_offload_rte_cast(const struct dpif_offload *offload) return CONTAINER_OF(offload, struct dpif_offload_rte_flow, offload); } +DECLARE_EXTERN_PER_THREAD_DATA(unsigned int, rte_flow_offload_thread_id); +DEFINE_EXTERN_PER_THREAD_DATA(rte_flow_offload_thread_id, OVSTHREAD_ID_UNSET); + +unsigned int +rte_flow_offload_thread_id(void) +{ + unsigned int id = *rte_flow_offload_thread_id_get(); + + if (OVS_UNLIKELY(id == OVSTHREAD_ID_UNSET)) { + /* Offload threads get their ID set at initialization, here + * only the RCU thread might need initialization. */ + ovs_assert(!strncmp(get_subprogram_name(), "urcu", strlen("urcu"))); + + /* RCU will compete with other threads for shared object access. + * Reclamation functions using a thread ID must be thread-safe. + * For that end, and because RCU must consider all potential shared + * objects anyway, its thread-id can be whichever, so return 0. + */ + id = 0; + *rte_flow_offload_thread_id_get() = id; + } + + return id; +} + +static unsigned int +dpif_offload_rte_ufid_to_thread_id(struct dpif_offload_rte_flow *offload, + const ovs_u128 ufid) +{ + uint32_t ufid_hash; + + if (offload->offload_thread_count == 1) { + return 0; + } + + ufid_hash = hash_words64_inline( + (const uint64_t [2]){ ufid.u64.lo, + ufid.u64.hi }, 2, 1); + return ufid_hash % offload->offload_thread_count; +} + +struct megaflow_to_mark_data { + const struct cmap_node node; + ovs_u128 mega_ufid; + uint32_t mark; +}; + +static inline uint32_t +rte_offload_ufid_hash(const ovs_u128 *ufid) +{ + return ufid->u32[0]; +} + +/* Associate megaflow with a mark, which is a 1:1 mapping. */ +static void +megaflow_to_mark_associate(struct dpif_offload_rte_flow *offload, + const ovs_u128 *mega_ufid, uint32_t mark) +{ + uint32_t hash = rte_offload_ufid_hash(mega_ufid); + struct megaflow_to_mark_data *data = xzalloc(sizeof(*data)); + unsigned int tid = rte_flow_offload_thread_id(); + + data->mega_ufid = *mega_ufid; + data->mark = mark; + + cmap_insert(&offload->offload_threads[tid].megaflow_to_mark, + CONST_CAST(struct cmap_node *, &data->node), hash); +} + +/* Disassociate megaflow with a mark. */ +static uint32_t +megaflow_to_mark_disassociate(struct dpif_offload_rte_flow *offload, + const ovs_u128 *mega_ufid) +{ + uint32_t hash = rte_offload_ufid_hash(mega_ufid); + struct megaflow_to_mark_data *data; + unsigned int tid = rte_flow_offload_thread_id(); + + CMAP_FOR_EACH_WITH_HASH (data, node, hash, + &offload->offload_threads[tid].megaflow_to_mark) { + if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) { + cmap_remove(&offload->offload_threads[tid].megaflow_to_mark, + CONST_CAST(struct cmap_node *, &data->node), hash); + ovsrcu_postpone(free, data); + return data->mark; + } + } + + VLOG_WARN("Masked ufid "UUID_FMT" is not associated with a mark?", + UUID_ARGS((struct uuid *) mega_ufid)); + + return INVALID_FLOW_MARK; +} + +static inline uint32_t +megaflow_to_mark_find(struct dpif_offload_rte_flow *offload, + const ovs_u128 *mega_ufid) +{ + uint32_t hash = rte_offload_ufid_hash(mega_ufid); + struct megaflow_to_mark_data *data; + unsigned int tid = rte_flow_offload_thread_id(); + + CMAP_FOR_EACH_WITH_HASH (data, node, hash, + &offload->offload_threads[tid].megaflow_to_mark) { + if (ovs_u128_equals(*mega_ufid, data->mega_ufid)) { + return data->mark; + } + } + + VLOG_DBG("Mark id for ufid "UUID_FMT" was not found", + UUID_ARGS((struct uuid *) mega_ufid)); + return INVALID_FLOW_MARK; +} + +static bool +dpif_offload_rte_is_offloading_netdev(struct dpif_offload_rte_flow *offload, + struct netdev *netdev) +{ + const struct dpif_offload *netdev_offload; + + netdev_offload = ovsrcu_get(const struct dpif_offload *, + &netdev->dpif_offload); + + return netdev_offload == &offload->offload; +} + +static struct rte_offload_thread_item * +dpif_offload_rte_alloc_flow_offload(int op) +{ + struct rte_offload_thread_item *item; + struct rte_offload_flow_item *flow_offload; + + item = xzalloc(sizeof *item + sizeof *flow_offload); + flow_offload = &item->data->flow; + + item->type = RTE_OFFLOAD_FLOW; + flow_offload->op = op; + + return item; +} + +static void +dpif_offload_rte_free_flow_offload__(struct rte_offload_thread_item *offload) +{ + struct rte_offload_flow_item *flow_offload = &offload->data->flow; + + free(flow_offload->actions); + free(offload); +} + +static void +dpif_offload_rte_free_flow_offload(struct rte_offload_thread_item *offload) +{ + ovsrcu_postpone(dpif_offload_rte_free_flow_offload__, offload); +} + +static void +dpif_offload_rte_free_offload(struct rte_offload_thread_item *offload) +{ + switch (offload->type) { + case RTE_OFFLOAD_FLOW: + dpif_offload_rte_free_flow_offload(offload); + break; + case RTE_OFFLOAD_FLUSH: + free(offload); + break; + default: + OVS_NOT_REACHED(); + }; +} + +static void +dpif_offload_rte_append_offload(const struct dpif_offload_rte_flow *offload, + struct rte_offload_thread_item *item, + unsigned int tid) +{ + ovs_assert(offload->offload_threads); + + mpsc_queue_insert(&offload->offload_threads[tid].queue, &item->node); + atomic_count_inc64(&offload->offload_threads[tid].enqueued_item); +} + +static void +dpif_offload_rte_offload_flow_enqueue(struct dpif_offload_rte_flow *offload, + struct rte_offload_thread_item *item) +{ + struct rte_offload_flow_item *flow_offload = &item->data->flow; + unsigned int tid; + + ovs_assert(item->type == RTE_OFFLOAD_FLOW); + + tid = dpif_offload_rte_ufid_to_thread_id(offload, flow_offload->ufid); + dpif_offload_rte_append_offload(offload, item, tid); +} + +static int +dpif_offload_rte_flow_offload_del(struct rte_offload_thread *thread, + struct rte_offload_thread_item *item) +{ + struct rte_offload_flow_item *flow = &item->data->flow; + uint32_t mark = INVALID_FLOW_MARK; + struct dpif_flow_stats stats; + struct netdev *netdev; + int error; + + netdev = dpif_offload_rte_get_netdev(thread->offload, flow->in_port); + + if (!netdev) { + VLOG_DBG("Failed to find netdev for port_id %d", flow->in_port); + error = ENODEV; + goto do_callback; + } + + error = netdev_offload_dpdk_flow_del(netdev, &flow->ufid, + flow->requested_stats ? &stats + : NULL); + + /* Disassociate the mark on error as well, since the main reason for the + * error is that the flow is no longer in hardware. */ + mark = megaflow_to_mark_disassociate(thread->offload, &flow->ufid); + +do_callback: + dpif_offload_datapath_flow_op_continue(&flow->callback, + flow->requested_stats ? &stats + : NULL, + mark, error); + return error; +} + static int -dpif_offload_rte_enable_offload(struct dpif_offload *dpif_offload, +dpif_offload_rte_flow_offload_put(struct rte_offload_thread *thread, + struct rte_offload_thread_item *item, + bool modify) +{ + struct rte_offload_flow_item *flow = &item->data->flow; + struct dpif_flow_stats stats; + struct netdev *netdev; + uint32_t mark; + int error = 0; + + mark = megaflow_to_mark_find(thread->offload, &flow->ufid); + if (modify) { + if (mark == INVALID_FLOW_MARK) { + /* We have not offloaded this flow, so we can not modify it. */ + error = ENOENT; + goto do_callback; + } + } else { + if (mark != INVALID_FLOW_MARK) { + VLOG_DBG("Flow has already been offloaded with mark %u", mark); + goto do_callback; + } + + mark = dpif_offload_allocate_flow_mark(); + if (mark == INVALID_FLOW_MARK) { + VLOG_ERR("Failed to allocate flow mark!"); + error = ENOSPC; + goto do_callback; + } + } + + netdev = dpif_offload_rte_get_netdev(thread->offload, flow->in_port); + + if (!netdev) { + VLOG_DBG("Failed to find netdev for port_id %d", flow->in_port); + error = ENODEV; + goto do_callback; + } + + if (!dpif_offload_rte_is_offloading_netdev(thread->offload, netdev)) { + error = EUNATCH; + goto do_callback; + } + + error = netdev_offload_dpdk_flow_put( + thread->offload, netdev, &flow->match, + CONST_CAST(struct nlattr *, flow->actions), flow->actions_len, + &flow->ufid, mark, flow->orig_in_port, + flow->requested_stats ? &stats : NULL); + +do_callback: + if (!error && !modify) { + megaflow_to_mark_associate(thread->offload, &flow->ufid, mark); + } else if (error) { + if (modify) { + /* We failed the modification, so the flow is no longer + * installed, remove the mapping. */ + if (mark != INVALID_FLOW_MARK) { + megaflow_to_mark_disassociate(thread->offload, &flow->ufid); + } + } else if (mark != INVALID_FLOW_MARK) { + /* We allocated a mark, but it was not used. */ + dpif_offload_free_flow_mark(mark); + mark = INVALID_FLOW_MARK; + } + } + + dpif_offload_datapath_flow_op_continue(&flow->callback, + flow->requested_stats ? &stats + : NULL, + mark, error); + return error; +} + +static void +dpif_offload_rte_offload_flow(struct rte_offload_thread *thread, + struct rte_offload_thread_item *item) +{ + struct rte_offload_flow_item *flow_offload = &item->data->flow; + const char *op; + int ret; + + switch (flow_offload->op) { + case RTE_NETDEV_FLOW_OFFLOAD_OP_ADD: + op = "add"; + ret = dpif_offload_rte_flow_offload_put(thread, item, false); + break; + case RTE_NETDEV_FLOW_OFFLOAD_OP_MOD: + op = "modify"; + ret = dpif_offload_rte_flow_offload_put(thread, item, true); + break; + case RTE_NETDEV_FLOW_OFFLOAD_OP_DEL: + op = "delete"; + ret = dpif_offload_rte_flow_offload_del(thread, item); + break; + default: + OVS_NOT_REACHED(); + } + + VLOG_DBG("%s to %s netdev flow "UUID_FMT, + ret == 0 ? "succeed" : "failed", op, + UUID_ARGS((struct uuid *) &flow_offload->ufid)); +} + +static void +dpif_offload_rte_offload_flush(struct rte_offload_thread_item *item) +{ + struct rte_offload_flush_item *flush = &item->data->flush; + + netdev_offload_dpdk_flow_flush(flush->offload, flush->netdev); + ovs_barrier_block(flush->barrier); +} + +#define RTE_OFFLOAD_BACKOFF_MIN 1 +#define RTE_OFFLOAD_BACKOFF_MAX 64 +#define RTE_OFFLOAD_QUIESCE_INTERVAL_US (10 * 1000) /* 10 ms */ + +static void * +dpif_offload_rte_offload_thread_main(void *arg) +{ + struct rte_offload_thread *ofl_thread = arg; + struct rte_offload_thread_item *offload; + struct mpsc_queue_node *node; + struct mpsc_queue *queue; + long long int latency_us; + long long int next_rcu; + uint64_t backoff; + bool exiting; + + if (*rte_flow_offload_thread_id_get() == OVSTHREAD_ID_UNSET) { + unsigned int id; + + id = atomic_count_inc(&ofl_thread->offload->next_offload_thread_id); + + /* Panic if any offload thread is getting a spurious ID. */ + ovs_assert(id < ofl_thread->offload->offload_thread_count); + + *rte_flow_offload_thread_id_get() = id; + } + + queue = &ofl_thread->queue; + mpsc_queue_acquire(queue); + + do { + long long int now; + + backoff = RTE_OFFLOAD_BACKOFF_MIN; + while (mpsc_queue_tail(queue) == NULL) { + xnanosleep(backoff * 1E6); + if (backoff < RTE_OFFLOAD_BACKOFF_MAX) { + backoff <<= 1; + } + + atomic_read_relaxed(&ofl_thread->offload->offload_thread_shutdown, + &exiting); + if (exiting) { + goto exit_thread; + } + } + + now = time_usec(); + next_rcu = now + RTE_OFFLOAD_QUIESCE_INTERVAL_US; + MPSC_QUEUE_FOR_EACH_POP (node, queue) { + offload = CONTAINER_OF(node, struct rte_offload_thread_item, node); + atomic_count_dec64(&ofl_thread->enqueued_item); + + switch (offload->type) { + case RTE_OFFLOAD_FLOW: + dpif_offload_rte_offload_flow(ofl_thread, offload); + break; + case RTE_OFFLOAD_FLUSH: + dpif_offload_rte_offload_flush(offload); + break; + default: + OVS_NOT_REACHED(); + } + + now = time_usec(); + latency_us = now - offload->timestamp; + mov_avg_cma_update(&ofl_thread->cma, latency_us); + mov_avg_ema_update(&ofl_thread->ema, latency_us); + + dpif_offload_rte_free_offload(offload); + + /* Do RCU synchronization at fixed interval. */ + if (now > next_rcu) { + ovsrcu_quiesce(); + next_rcu = time_usec() + RTE_OFFLOAD_QUIESCE_INTERVAL_US; + } + } + + atomic_read_relaxed(&ofl_thread->offload->offload_thread_shutdown, + &exiting); + } while (!exiting); + +exit_thread: + mpsc_queue_release(queue); + return NULL; +} + +static void +dpif_offload_rte_offload_threads_init(struct dpif_offload_rte_flow *offload) +{ + offload->offload_threads = xcalloc(offload->offload_thread_count, + sizeof(struct rte_offload_thread)); + + for (unsigned int tid = 0; tid < offload->offload_thread_count; tid++) { + struct rte_offload_thread *thread; + + thread = &offload->offload_threads[tid]; + mpsc_queue_init(&thread->queue); + cmap_init(&thread->megaflow_to_mark); + atomic_init(&thread->enqueued_item, 0); + mov_avg_cma_init(&thread->cma); + mov_avg_ema_init(&thread->ema, 100); + thread->offload = offload; + thread->thread = ovs_thread_create( + "rte_offload", dpif_offload_rte_offload_thread_main, thread); + } +} + +static long long int +dpif_offload_rte_get_timestamp(void) +{ + /* XXX: We should look for a better, more efficient way to obtain a + * timestamp in the fast path, if only used for gathering statistics. */ + return time_usec(); +} + +static void +dpif_offload_rte_flush_enqueue(struct dpif_offload_rte_flow *offload, + struct netdev *netdev, + struct ovs_barrier *barrier) +{ + unsigned int tid; + long long int now_us = dpif_offload_rte_get_timestamp(); + + if (!dpif_offload_is_offload_enabled()) { + return; + } + + for (tid = 0; tid < offload->offload_thread_count; tid++) { + struct rte_offload_thread_item *item; + struct rte_offload_flush_item *flush; + + item = xmalloc(sizeof *item + sizeof *flush); + item->type = RTE_OFFLOAD_FLUSH; + item->timestamp = now_us; + + flush = &item->data->flush; + flush->netdev = netdev; + flush->offload = offload; + flush->barrier = barrier; + + dpif_offload_rte_append_offload(offload, item, tid); + } +} + +/* Blocking call that will wait on the offload thread to + * complete its work. As the flush order will only be + * enqueued after existing offload requests, those previous + * offload requests must be processed. + * + * Flow offload flush is done when a port is being deleted. + * Right before this call executes, the offload API is disabled + * for the port. This call must be made blocking until the + * offload provider completed its job. + */ +static void +dpif_offload_rte_flush(struct dpif_offload_rte_flow *offload, + struct netdev *netdev) +{ + /* The flush mutex serves to exclude mutual access to the static + * barrier, and to prevent multiple flush orders to several threads. + * + * The memory barrier needs to go beyond the function scope as + * the other threads can resume from blocking after this function + * already finished. + * + * Additionally, because the flush operation is blocking, it would + * deadlock if multiple offload threads were blocking on several + * different barriers. Only allow a single flush order in the offload + * queue at a time. + */ + static struct ovs_mutex flush_mutex = OVS_MUTEX_INITIALIZER; + static struct ovs_barrier barrier OVS_GUARDED_BY(flush_mutex); + + ovs_mutex_lock(&flush_mutex); + + ovs_barrier_init(&barrier, 1 + offload->offload_thread_count); + + dpif_offload_rte_flush_enqueue(offload, netdev, &barrier); + ovs_barrier_block(&barrier); + ovs_barrier_destroy(&barrier); + + ovs_mutex_unlock(&flush_mutex); +} + +void dpif_offload_rte_traverse_ports( + const struct dpif_offload_rte_flow *offload, + bool (*cb)(struct netdev *, odp_port_t, void *), void *aux) +{ + struct dpif_offload_port_mgr_port *port; + + DPIF_OFFLOAD_PORT_MGR_PORT_FOR_EACH (port, offload->port_mgr) { + if (cb(port->netdev, port->port_no, aux)) { + break; + } + } +} + +static int +dpif_offload_rte_enable_offload(struct dpif_offload *offload_, struct dpif_offload_port_mgr_port *port) { + struct dpif_offload_rte_flow *offload = dpif_offload_rte_cast(offload_); struct netdev *netdev = port->netdev; - netdev_offload_dpdk_init(netdev); - dpif_offload_set_netdev_offload(netdev, dpif_offload); + netdev_offload_dpdk_init(netdev, offload->offload_thread_count); + dpif_offload_set_netdev_offload(netdev, offload_); return 0; } @@ -98,6 +702,18 @@ dpif_offload_rte_port_del(struct dpif_offload *offload, odp_port_t port_no) struct dpif_offload_port_mgr_port *port; int ret = 0; + port = dpif_offload_port_mgr_find_by_odp_port(offload_rte->port_mgr, + port_no); + + if (dpif_offload_is_offload_enabled() && port) { + /* If hardware offload is enabled, we first need to flush (complete) + * all pending flow operations, especially the pending delete ones, + * before we remove the netdev from the port_mgr list. + */ + dpif_offload_set_netdev_offload(port->netdev, NULL); + dpif_offload_rte_flush(offload_rte, port->netdev); + } + port = dpif_offload_port_mgr_remove(offload_rte->port_mgr, port_no, true); if (port) { if (dpif_offload_is_offload_enabled()) { @@ -138,14 +754,13 @@ dpif_offload_rte_port_dump_done(const struct dpif_offload *offload_, return dpif_offload_port_mgr_port_dump_done(offload->port_mgr, state); } -static struct netdev * -dpif_offload_rte_flow_get_netdev(struct dpif_offload *offload, - odp_port_t port_no) +struct netdev * +dpif_offload_rte_get_netdev(const struct dpif_offload_rte_flow *offload, + odp_port_t port_no) { - struct dpif_offload_rte_flow *offload_rte = dpif_offload_rte_cast(offload); struct dpif_offload_port_mgr_port *port; - port = dpif_offload_port_mgr_find_by_odp_port(offload_rte->port_mgr, + port = dpif_offload_port_mgr_find_by_odp_port(offload->port_mgr, port_no); if (!port) { return NULL; @@ -154,6 +769,15 @@ dpif_offload_rte_flow_get_netdev(struct dpif_offload *offload, return port->netdev; } +static struct netdev * +dpif_offload_rte_get_netdev_(struct dpif_offload *offload, + odp_port_t port_no) +{ + struct dpif_offload_rte_flow *offload_rte = dpif_offload_rte_cast(offload); + + return dpif_offload_rte_get_netdev(offload_rte, port_no); +} + static int dpif_offload_rte_open(const struct dpif_offload_class *offload_class, struct dpif *dpif, struct dpif_offload **dpif_offload) @@ -168,6 +792,11 @@ dpif_offload_rte_open(const struct dpif_offload_class *offload_class, OVSTHREAD_ONCE_INITIALIZER; *dpif_offload = &offload_rte->offload; + offload_rte->offload_thread_count = DEFAULT_OFFLOAD_THREAD_COUNT; + offload_rte->offload_threads = NULL; + atomic_count_init(&offload_rte->next_offload_thread_id, 0); + atomic_init(&offload_rte->offload_thread_shutdown, false); + return 0; } @@ -193,6 +822,15 @@ dpif_offload_rte_close(struct dpif_offload *dpif_offload) dpif_offload); dpif_offload_port_mgr_uninit(offload_rte->port_mgr); + + atomic_store_relaxed(&offload_rte->offload_thread_shutdown, true); + if (offload_rte->offload_threads) { + for (int i = 0; i < offload_rte->offload_thread_count; i++) { + xpthread_join(offload_rte->offload_threads[i].thread, NULL); + mpsc_queue_destroy(&offload_rte->offload_threads[i].queue); + cmap_destroy(&offload_rte->offload_threads[i].megaflow_to_mark); + } + } free(offload_rte); } @@ -203,44 +841,39 @@ static bool dpif_offload_rte_late_enable(struct dpif_offload_port_mgr_port *p, return false; } -/* XXX: External reference, will be removed after full integration. */ -void rte_flow_offload_thread_set_thread_nb(unsigned int thread_nb); - static void -dpif_offload_rte_set_config(struct dpif_offload *offload, +dpif_offload_rte_set_config(struct dpif_offload *offload_, const struct smap *other_cfg) { - struct dpif_offload_rte_flow *offload_rte = dpif_offload_rte_cast(offload); + struct dpif_offload_rte_flow *offload = dpif_offload_rte_cast(offload_); /* We maintain the existing behavior where global configurations * are only accepted when hardware offload is initially enabled. * Once enabled, they cannot be updated or reconfigured. */ if (smap_get_bool(other_cfg, "hw-offload", false)) { - if (ovsthread_once_start(&offload_rte->once_enable)) { + if (ovsthread_once_start(&offload->once_enable)) { - offload_thread_nb = smap_get_ullong(other_cfg, - "n-offload-threads", - DEFAULT_OFFLOAD_THREAD_NB); - if (offload_thread_nb == 0 || - offload_thread_nb > MAX_OFFLOAD_THREAD_NB) { + unsigned int offload_thread_count = smap_get_uint( + other_cfg, "n-offload-threads", DEFAULT_OFFLOAD_THREAD_COUNT); + + if (offload_thread_count == 0 || + offload_thread_count > MAX_OFFLOAD_THREAD_COUNT) { VLOG_WARN("netdev: Invalid number of threads requested: %u", - offload_thread_nb); - offload_thread_nb = DEFAULT_OFFLOAD_THREAD_NB; + offload_thread_count); + offload_thread_count = DEFAULT_OFFLOAD_THREAD_COUNT; } - if (smap_get(other_cfg, "n-offload-threads")) { - VLOG_INFO("Flow API using %u thread%s", - offload_thread_nb, - offload_thread_nb > 1 ? "s" : ""); - } + VLOG_INFO("Flow API using %u thread%s", offload_thread_count, + offload_thread_count > 1 ? "s" : ""); - rte_flow_offload_thread_set_thread_nb(offload_thread_nb); + offload->offload_thread_count = offload_thread_count; - dpif_offload_port_mgr_traverse_ports(offload_rte->port_mgr, + dpif_offload_rte_offload_threads_init(offload); + dpif_offload_port_mgr_traverse_ports(offload->port_mgr, dpif_offload_rte_late_enable, offload); - ovsthread_once_done(&offload_rte->once_enable); + ovsthread_once_done(&offload->once_enable); } } } @@ -310,13 +943,22 @@ dpif_offload_rte_can_offload(struct dpif_offload *dpif_offload OVS_UNUSED, return netdev_dpdk_flow_api_supported(netdev, true); } +struct get_n_offload_cb_aux { + uint64_t *total; + union { + unsigned int offload_thread_count; + unsigned int offload_thread_id; + }; +}; + static bool dpif_offload_rte_flow_get_n_offloaded_cb( - struct dpif_offload_port_mgr_port *port, void *aux) + struct dpif_offload_port_mgr_port *port, void *aux_) { - uint64_t *total = aux; + struct get_n_offload_cb_aux *aux = aux_; - *total += netdev_offload_dpdk_flow_get_n_offloaded(port->netdev); + *aux->total += netdev_offload_dpdk_flow_get_n_offloaded( + port->netdev, aux->offload_thread_count); return false; } @@ -326,30 +968,237 @@ dpif_offload_rte_flow_get_n_offloaded(const struct dpif_offload *offload) struct dpif_offload_rte_flow *offload_rte = dpif_offload_rte_cast(offload); uint64_t total = 0; + struct get_n_offload_cb_aux aux = { + .offload_thread_count = offload_rte->offload_thread_count, + .total = &total, + }; + if (!dpif_offload_is_offload_enabled()) { return 0; } dpif_offload_port_mgr_traverse_ports( offload_rte->port_mgr, dpif_offload_rte_flow_get_n_offloaded_cb, - &total); + &aux); return total; } -static int -dpif_offload_rte_netdev_flow_flush(const struct dpif_offload *offload - OVS_UNUSED, struct netdev *netdev) +static bool +dpif_offload_rte_flow_get_n_offloaded_by_thread_cb( + struct dpif_offload_port_mgr_port *port, void *aux_) { - return netdev_offload_dpdk_flow_flush(netdev); + struct get_n_offload_cb_aux *aux = aux_; + + *aux->total += netdev_offload_dpdk_flow_get_n_offloaded_by_thread( + port->netdev, aux->offload_thread_id); + return false; +} + +static uint64_t +dpif_offload_rte_flow_get_n_offloaded_by_thread( + struct dpif_offload_rte_flow *offload, unsigned int tid) +{ + uint64_t total = 0; + + struct get_n_offload_cb_aux aux = { + .offload_thread_id = tid, + .total = &total, + }; + + if (!dpif_offload_is_offload_enabled()) { + return 0; + } + + dpif_offload_port_mgr_traverse_ports( + offload->port_mgr, + dpif_offload_rte_flow_get_n_offloaded_by_thread_cb, + &aux); + + return total; } static int dpif_offload_rte_netdev_hw_miss_packet_recover( - const struct dpif_offload *offload OVS_UNUSED, struct netdev *netdev, + const struct dpif_offload *offload_, struct netdev *netdev, struct dp_packet *packet) { - return netdev_offload_dpdk_hw_miss_packet_recover(netdev, packet); + struct dpif_offload_rte_flow *offload = dpif_offload_rte_cast(offload_); + + return netdev_offload_dpdk_hw_miss_packet_recover(offload, netdev, packet); +} + +static int +dpif_offload_rte_netdev_flow_put(const struct dpif_offload *offload_, + struct netdev *netdev OVS_UNUSED, + struct dpif_offload_flow_put *put, + uint32_t *flow_mark) +{ + struct dpif_offload_rte_flow *offload = dpif_offload_rte_cast(offload_); + struct rte_offload_thread_item *item; + struct rte_offload_flow_item *flow_offload; + + item = dpif_offload_rte_alloc_flow_offload( + put->modify ? RTE_NETDEV_FLOW_OFFLOAD_OP_MOD + : RTE_NETDEV_FLOW_OFFLOAD_OP_ADD); + item->timestamp = dpif_offload_rte_get_timestamp(); + + flow_offload = &item->data->flow; + flow_offload->in_port = put->in_port; + flow_offload->ufid = *put->ufid; + flow_offload->match = *put->match; + flow_offload->actions = xmalloc(put->actions_len); + nullable_memcpy(flow_offload->actions, put->actions, put->actions_len); + flow_offload->actions_len = put->actions_len; + flow_offload->orig_in_port = put->orig_in_port; + flow_offload->requested_stats = !!put->stats; + flow_offload->callback = put->cb_data; + + dpif_offload_rte_offload_flow_enqueue(offload, item); + + *flow_mark = INVALID_FLOW_MARK; + return EINPROGRESS; +} + +static int +dpif_offload_rte_netdev_flow_del(const struct dpif_offload *offload_, + struct netdev *netdev OVS_UNUSED, + struct dpif_offload_flow_del *del, + uint32_t *flow_mark) +{ + struct dpif_offload_rte_flow *offload = dpif_offload_rte_cast(offload_); + struct rte_offload_thread_item *item; + struct rte_offload_flow_item *flow_offload; + + item = dpif_offload_rte_alloc_flow_offload(RTE_NETDEV_FLOW_OFFLOAD_OP_DEL); + item->timestamp =dpif_offload_rte_get_timestamp(); + + flow_offload = &item->data->flow; + flow_offload->in_port = del->in_port; + flow_offload->requested_stats = !!del->stats; + flow_offload->ufid = *del->ufid; + flow_offload->callback = del->cb_data; + + dpif_offload_rte_offload_flow_enqueue(offload, item); + + *flow_mark = INVALID_FLOW_MARK; + return EINPROGRESS; +} + +static bool +dpif_offload_rte_netdev_flow_stats(const struct dpif_offload *ol OVS_UNUSED, + struct netdev *netdev, + const ovs_u128 *ufid, + struct dpif_flow_stats *stats, + struct dpif_flow_attrs *attrs) +{ + uint64_t act_buf[1024 / 8]; + struct nlattr *actions; + struct match match; + struct ofpbuf buf; + + ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf); + + return !netdev_offload_dpdk_flow_get(netdev, &match, &actions, + ufid, stats, attrs, &buf); +} + +static int +dpif_offload_rte_get_global_stats(const struct dpif_offload *offload_, + struct netdev_custom_stats *stats) +{ + struct dpif_offload_rte_flow *offload = dpif_offload_rte_cast(offload_); + unsigned int nb_thread = offload->offload_thread_count; + struct rte_offload_thread *offload_threads = offload->offload_threads; + unsigned int tid; + size_t i; + + enum { + DP_NETDEV_HW_OFFLOADS_STATS_ENQUEUED, + DP_NETDEV_HW_OFFLOADS_STATS_INSERTED, + DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_MEAN, + DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_STDDEV, + DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN, + DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV, + }; + struct { + const char *name; + uint64_t total; + } hwol_stats[] = { + [DP_NETDEV_HW_OFFLOADS_STATS_ENQUEUED] = + { " Enqueued offloads", 0 }, + [DP_NETDEV_HW_OFFLOADS_STATS_INSERTED] = + { " Inserted offloads", 0 }, + [DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_MEAN] = + { " Cumulative Average latency (us)", 0 }, + [DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_STDDEV] = + { " Cumulative Latency stddev (us)", 0 }, + [DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN] = + { " Exponential Average latency (us)", 0 }, + [DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV] = + { " Exponential Latency stddev (us)", 0 }, + }; + + if (!dpif_offload_is_offload_enabled() || !nb_thread) { + /* Leave stats structure untouched on error per API guidelines. */ + return EINVAL; + } + + stats->label = xstrdup(dpif_offload_name(offload_)); + + /* nb_thread counters for the overall total as well. */ + stats->size = ARRAY_SIZE(hwol_stats) * (nb_thread + 1); + stats->counters = xcalloc(stats->size, sizeof *stats->counters); + + for (tid = 0; tid < nb_thread; tid++) { + uint64_t counts[ARRAY_SIZE(hwol_stats)]; + size_t idx = ((tid + 1) * ARRAY_SIZE(hwol_stats)); + + memset(counts, 0, sizeof counts); + if (offload_threads != NULL) { + counts[DP_NETDEV_HW_OFFLOADS_STATS_INSERTED] = + dpif_offload_rte_flow_get_n_offloaded_by_thread(offload, tid); + + atomic_read_relaxed(&offload_threads[tid].enqueued_item, + &counts[DP_NETDEV_HW_OFFLOADS_STATS_ENQUEUED]); + + counts[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_MEAN] = + mov_avg_cma(&offload_threads[tid].cma); + counts[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_STDDEV] = + mov_avg_cma_std_dev(&offload_threads[tid].cma); + + counts[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN] = + mov_avg_ema(&offload_threads[tid].ema); + counts[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV] = + mov_avg_ema_std_dev(&offload_threads[tid].ema); + } + + for (i = 0; i < ARRAY_SIZE(hwol_stats); i++) { + snprintf(stats->counters[idx + i].name, + sizeof(stats->counters[idx + i].name), + " [%3u] %s", tid, hwol_stats[i].name); + stats->counters[idx + i].value = counts[i]; + hwol_stats[i].total += counts[i]; + } + } + + /* Do an average of the average for the aggregate. */ + hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_MEAN].total /= nb_thread; + hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_LAT_CMA_STDDEV].total /= nb_thread; + hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_MEAN].total /= nb_thread; + hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_LAT_EMA_STDDEV].total /= nb_thread; + + /* Get the total offload count. */ + hwol_stats[DP_NETDEV_HW_OFFLOADS_STATS_INSERTED].total = + dpif_offload_rte_flow_get_n_offloaded(offload_); + + for (i = 0; i < ARRAY_SIZE(hwol_stats); i++) { + snprintf(stats->counters[i].name, sizeof(stats->counters[i].name), + " Total %s", hwol_stats[i].name); + stats->counters[i].value = hwol_stats[i].total; + } + + return 0; } struct dpif_offload_class dpif_offload_rte_flow_class = { @@ -362,6 +1211,7 @@ struct dpif_offload_class dpif_offload_rte_flow_class = { .close = dpif_offload_rte_close, .set_config = dpif_offload_rte_set_config, .get_debug = dpif_offload_rte_get_debug, + .get_global_stats = dpif_offload_rte_get_global_stats, .can_offload = dpif_offload_rte_can_offload, .port_add = dpif_offload_rte_port_add, .port_del = dpif_offload_rte_port_del, @@ -369,8 +1219,10 @@ struct dpif_offload_class dpif_offload_rte_flow_class = { .port_dump_next = dpif_offload_rte_port_dump_next, .port_dump_done = dpif_offload_rte_port_dump_done, .flow_get_n_offloaded = dpif_offload_rte_flow_get_n_offloaded, - .get_netdev = dpif_offload_rte_flow_get_netdev, - .netdev_flow_flush = dpif_offload_rte_netdev_flow_flush, + .get_netdev = dpif_offload_rte_get_netdev_, .netdev_hw_miss_packet_recover = \ dpif_offload_rte_netdev_hw_miss_packet_recover, + .netdev_flow_put = dpif_offload_rte_netdev_flow_put, + .netdev_flow_del = dpif_offload_rte_netdev_flow_del, + .netdev_flow_stats = dpif_offload_rte_netdev_flow_stats, }; diff --git a/lib/dpif-offload-tc.c b/lib/dpif-offload-tc.c index 259e87029..faca54408 100644 --- a/lib/dpif-offload-tc.c +++ b/lib/dpif-offload-tc.c @@ -330,13 +330,6 @@ dpif_offload_tc_netdev_flow_flush_(struct netdev *netdev) return netdev_offload_tc_flow_flush(netdev); } -static int -dpif_offload_tc_netdev_flow_flush(const struct dpif_offload *offload - OVS_UNUSED, struct netdev *netdev) -{ - return dpif_offload_tc_netdev_flow_flush_(netdev); -} - static bool dpif_offload_tc_flow_flush_cb(struct dpif_offload_port_mgr_port *port, void *aux) @@ -877,5 +870,4 @@ struct dpif_offload_class dpif_offload_tc_class = { .meter_get = dpif_offload_tc_meter_get, .meter_del = dpif_offload_tc_meter_del, .get_netdev = dpif_offload_tc_get_netdev, - .netdev_flow_flush = dpif_offload_tc_netdev_flow_flush, }; diff --git a/lib/dpif-offload.c b/lib/dpif-offload.c index 9b4700c94..830a04059 100644 --- a/lib/dpif-offload.c +++ b/lib/dpif-offload.c @@ -1421,19 +1421,6 @@ dpif_offload_netdev_same_offload(const struct netdev *a, return offload_a == offload_b; } -int -dpif_offload_netdev_flush_flows(struct netdev *netdev) -{ - const struct dpif_offload *offload; - - offload = ovsrcu_get(const struct dpif_offload *, &netdev->dpif_offload); - - if (offload && offload->class->netdev_flow_flush) { - return offload->class->netdev_flow_flush(offload, netdev); - } - return EOPNOTSUPP; -} - int dpif_offload_datapath_flow_put(const char *dpif_name, struct dpif_offload_flow_put *put, @@ -1816,57 +1803,3 @@ dpif_offload_port_mgr_port_dump_done( free(state); return 0; } - -/* XXX: Temporary functions below, which will be removed once fully - * refactored. */ -struct netdev *dpif_netdev_offload_get_netdev_by_port_id(odp_port_t); -void dpif_netdev_offload_ports_traverse( - bool (*cb)(struct netdev *, odp_port_t, void *), void *aux); - -struct netdev * -dpif_netdev_offload_get_netdev_by_port_id(odp_port_t port_no) -{ - struct dp_offload *dp_offload; - struct dpif dpif; - - ovs_mutex_lock(&dpif_offload_mutex); - dp_offload = shash_find_data(&dpif_offload_providers, "netdev@ovs-netdev"); - ovs_mutex_unlock(&dpif_offload_mutex); - - if (!dp_offload) { - return NULL; - } - - memset(&dpif, 0, sizeof dpif); - ovsrcu_set(&dpif.dp_offload, dp_offload); - - return dpif_offload_get_netdev_by_port_id(&dpif, NULL, port_no); -} - -void -dpif_netdev_offload_ports_traverse( - bool (*cb)(struct netdev *, odp_port_t, void *), void *aux) -{ - struct dpif_offload_port_dump dump; - struct dp_offload *dp_offload; - struct dpif_offload_port port; - struct dpif dpif; - - ovs_mutex_lock(&dpif_offload_mutex); - dp_offload = shash_find_data(&dpif_offload_providers, "netdev@ovs-netdev"); - ovs_mutex_unlock(&dpif_offload_mutex); - - if (!dp_offload) { - return; - } - - memset(&dpif, 0, sizeof dpif); - ovsrcu_set(&dpif.dp_offload, dp_offload); - - DPIF_OFFLOAD_PORT_FOR_EACH (&port, &dump, &dpif) { - if (cb(port.netdev, port.port_no, aux)) { - dpif_offload_port_dump_done(&dump); - break; - } - } -} diff --git a/lib/dpif-offload.h b/lib/dpif-offload.h index 279c7898b..405f867f9 100644 --- a/lib/dpif-offload.h +++ b/lib/dpif-offload.h @@ -154,7 +154,6 @@ int dpif_offload_stats_get(struct dpif *, struct netdev_custom_stats **stats, /* Netdev specific function, which can be used in the fast path. */ bool dpif_offload_netdev_same_offload(const struct netdev *, const struct netdev *); -int dpif_offload_netdev_flush_flows(struct netdev *); int dpif_offload_netdev_hw_miss_packet_recover(struct netdev *, struct dp_packet *); diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index ad99ba2ee..6d987a417 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -26,6 +26,7 @@ #include "cmap.h" #include "dpif-netdev.h" #include "dpif-offload.h" +#include "dpif-offload-rte_flow-private.h" #include "netdev-offload-dpdk.h" #include "netdev-provider.h" #include "netdev-vport.h" @@ -39,13 +40,6 @@ VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(600, 600); -/* XXX: Temporarily external declarations, will be removed during cleanup. */ -unsigned int rte_flow_offload_thread_nb(void); -unsigned int rte_flow_offload_thread_id(void); -struct netdev *dpif_netdev_offload_get_netdev_by_port_id(odp_port_t); -void dpif_netdev_offload_ports_traverse( - bool (*cb)(struct netdev *, odp_port_t, void *), void *aux); - /* Thread-safety * ============= * @@ -84,14 +78,14 @@ struct netdev_offload_dpdk_data { }; static int -offload_data_init(struct netdev *netdev) +offload_data_init(struct netdev *netdev, unsigned int offload_thread_count) { struct netdev_offload_dpdk_data *data; data = xzalloc(sizeof *data); ovs_mutex_init(&data->map_lock); cmap_init(&data->ufid_to_rte_flow); - data->rte_flow_counters = xcalloc(rte_flow_offload_thread_nb(), + data->rte_flow_counters = xcalloc(offload_thread_count, sizeof *data->rte_flow_counters); ovsrcu_set(&netdev->hw_info.offload_data, (void *) data); @@ -1154,7 +1148,8 @@ vport_to_rte_tunnel(struct netdev *vport, } static int -add_vport_match(struct flow_patterns *patterns, +add_vport_match(struct dpif_offload_rte_flow *offload, + struct flow_patterns *patterns, odp_port_t orig_in_port, struct netdev *tnldev) { @@ -1165,7 +1160,7 @@ add_vport_match(struct flow_patterns *patterns, struct netdev *physdev; int ret; - physdev = dpif_netdev_offload_get_netdev_by_port_id(orig_in_port); + physdev = dpif_offload_rte_get_netdev(offload, orig_in_port); if (physdev == NULL) { return -1; } @@ -1375,14 +1370,15 @@ parse_gre_match(struct flow_patterns *patterns, } static int OVS_UNUSED -parse_flow_tnl_match(struct netdev *tnldev, +parse_flow_tnl_match(struct dpif_offload_rte_flow *offload, + struct netdev *tnldev, struct flow_patterns *patterns, odp_port_t orig_in_port, struct match *match) { int ret; - ret = add_vport_match(patterns, orig_in_port, tnldev); + ret = add_vport_match(offload, patterns, orig_in_port, tnldev); if (ret) { return ret; } @@ -1398,7 +1394,8 @@ parse_flow_tnl_match(struct netdev *tnldev, } static int -parse_flow_match(struct netdev *netdev, +parse_flow_match(struct dpif_offload_rte_flow *offload OVS_UNUSED, + struct netdev *netdev, odp_port_t orig_in_port OVS_UNUSED, struct flow_patterns *patterns, struct match *match) @@ -1416,7 +1413,7 @@ parse_flow_match(struct netdev *netdev, patterns->physdev = netdev; #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */ if (netdev_vport_is_vport_class(netdev->netdev_class) && - parse_flow_tnl_match(netdev, patterns, orig_in_port, match)) { + parse_flow_tnl_match(offload, netdev, patterns, orig_in_port, match)) { return -1; } #endif @@ -1838,7 +1835,8 @@ add_represented_port_action(struct flow_actions *actions, } static int -add_output_action(struct netdev *netdev, +add_output_action(struct dpif_offload_rte_flow *offload, + struct netdev *netdev, struct flow_actions *actions, const struct nlattr *nla) { @@ -1847,7 +1845,7 @@ add_output_action(struct netdev *netdev, int ret = 0; port = nl_attr_get_odp_port(nla); - outdev = dpif_netdev_offload_get_netdev_by_port_id(port); + outdev = dpif_offload_rte_get_netdev(offload, port); if (outdev == NULL) { VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port); return -1; @@ -2129,7 +2127,8 @@ add_tunnel_push_action(struct flow_actions *actions, } static int -parse_clone_actions(struct netdev *netdev, +parse_clone_actions(struct dpif_offload_rte_flow *offload, + struct netdev *netdev, struct flow_actions *actions, const struct nlattr *clone_actions, const size_t clone_actions_len) @@ -2144,7 +2143,7 @@ parse_clone_actions(struct netdev *netdev, const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca); add_tunnel_push_action(actions, tnl_push); } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) { - if (add_output_action(netdev, actions, ca)) { + if (add_output_action(offload, netdev, actions, ca)) { return -1; } } else if (clone_type == OVS_ACTION_ATTR_PUSH_VLAN) { @@ -2170,7 +2169,8 @@ add_jump_action(struct flow_actions *actions, uint32_t group) } static int OVS_UNUSED -add_tnl_pop_action(struct netdev *netdev, +add_tnl_pop_action(struct dpif_offload_rte_flow *offload, + struct netdev *netdev, struct flow_actions *actions, const struct nlattr *nla) { @@ -2183,7 +2183,7 @@ add_tnl_pop_action(struct netdev *netdev, int ret; port = nl_attr_get_odp_port(nla); - vport = dpif_netdev_offload_get_netdev_by_port_id(port); + vport = dpif_offload_rte_get_netdev(offload, port); if (vport == NULL) { return -1; } @@ -2213,7 +2213,8 @@ add_tnl_pop_action(struct netdev *netdev, } static int -parse_flow_actions(struct netdev *netdev, +parse_flow_actions(struct dpif_offload_rte_flow *offload, + struct netdev *netdev, struct flow_actions *actions, struct nlattr *nl_actions, size_t nl_actions_len) @@ -2224,7 +2225,7 @@ parse_flow_actions(struct netdev *netdev, add_count_action(actions); NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) { - if (add_output_action(netdev, actions, nla)) { + if (add_output_action(offload, netdev, actions, nla)) { return -1; } } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) { @@ -2254,13 +2255,13 @@ parse_flow_actions(struct netdev *netdev, const struct nlattr *clone_actions = nl_attr_get(nla); size_t clone_actions_len = nl_attr_get_size(nla); - if (parse_clone_actions(netdev, actions, clone_actions, + if (parse_clone_actions(offload, netdev, actions, clone_actions, clone_actions_len)) { return -1; } #ifdef ALLOW_EXPERIMENTAL_API /* Packet restoration API required. */ } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_TUNNEL_POP) { - if (add_tnl_pop_action(netdev, actions, nla)) { + if (add_tnl_pop_action(offload, netdev, actions, nla)) { return -1; } #endif @@ -2280,7 +2281,8 @@ parse_flow_actions(struct netdev *netdev, } static struct rte_flow * -netdev_offload_dpdk_actions(struct netdev *netdev, +netdev_offload_dpdk_actions(struct dpif_offload_rte_flow *offload, + struct netdev *netdev, struct flow_patterns *patterns, struct nlattr *nl_actions, size_t actions_len) @@ -2295,7 +2297,8 @@ netdev_offload_dpdk_actions(struct netdev *netdev, struct rte_flow_error error; int ret; - ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len); + ret = parse_flow_actions(offload, netdev, &actions, nl_actions, + actions_len); if (ret) { goto out; } @@ -2307,12 +2310,14 @@ out: } static struct ufid_to_rte_flow_data * -netdev_offload_dpdk_add_flow(struct netdev *netdev, +netdev_offload_dpdk_add_flow(struct dpif_offload_rte_flow *offload, + struct netdev *netdev, struct match *match, struct nlattr *nl_actions, size_t actions_len, const ovs_u128 *ufid, - struct dpif_netdev_offload_info *info) + uint32_t flow_mark, + odp_port_t orig_in_port) { struct flow_patterns patterns = { .items = NULL, @@ -2323,20 +2328,20 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev, bool actions_offloaded = true; struct rte_flow *flow; - if (parse_flow_match(netdev, info->orig_in_port, &patterns, match)) { + if (parse_flow_match(offload, netdev, orig_in_port, &patterns, match)) { VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported", netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid)); goto out; } - flow = netdev_offload_dpdk_actions(patterns.physdev, &patterns, nl_actions, - actions_len); + flow = netdev_offload_dpdk_actions(offload, patterns.physdev, &patterns, + nl_actions, actions_len); if (!flow && !netdev_vport_is_vport_class(netdev->netdev_class)) { /* If we failed to offload the rule actions fallback to MARK+RSS * actions. */ flow = netdev_offload_dpdk_mark_rss(&patterns, netdev, - info->flow_mark); + flow_mark); actions_offloaded = false; } @@ -2427,10 +2432,11 @@ get_netdev_odp_cb(struct netdev *netdev, } int -netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match, +netdev_offload_dpdk_flow_put(struct dpif_offload_rte_flow *offload, + struct netdev *netdev, struct match *match, struct nlattr *actions, size_t actions_len, - const ovs_u128 *ufid, - struct dpif_netdev_offload_info *info, + const ovs_u128 *ufid, uint32_t flow_mark, + odp_port_t orig_in_port, struct dpif_flow_stats *stats) { struct ufid_to_rte_flow_data *rte_flow_data; @@ -2453,8 +2459,8 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match, /* Extract the orig_in_port from physdev as in case of modify the one * provided by upper layer cannot be used. */ - dpif_netdev_offload_ports_traverse(get_netdev_odp_cb, &aux); - info->orig_in_port = aux.odp_port; + dpif_offload_rte_traverse_ports(offload, get_netdev_odp_cb, &aux); + orig_in_port = aux.odp_port; old_stats = rte_flow_data->stats; modification = true; ret = netdev_offload_dpdk_flow_destroy(rte_flow_data); @@ -2463,8 +2469,9 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match, } } - rte_flow_data = netdev_offload_dpdk_add_flow(netdev, match, actions, - actions_len, ufid, info); + rte_flow_data = netdev_offload_dpdk_add_flow(offload, netdev, match, + actions, actions_len, ufid, + flow_mark, orig_in_port); if (!rte_flow_data) { return -1; } @@ -2496,7 +2503,8 @@ netdev_offload_dpdk_flow_del(struct netdev *netdev OVS_UNUSED, } int -netdev_offload_dpdk_init(struct netdev *netdev) +netdev_offload_dpdk_init(struct netdev *netdev, + unsigned int offload_thread_count) { int ret = EOPNOTSUPP; @@ -2508,7 +2516,7 @@ netdev_offload_dpdk_init(struct netdev *netdev) } if (netdev_dpdk_flow_api_supported(netdev, false)) { - ret = offload_data_init(netdev); + ret = offload_data_init(netdev, offload_thread_count); } return ret; @@ -2617,12 +2625,13 @@ flush_in_vport_cb(struct netdev *vport, } int -netdev_offload_dpdk_flow_flush(struct netdev *netdev) +netdev_offload_dpdk_flow_flush(struct dpif_offload_rte_flow *offload, + struct netdev *netdev) { flush_netdev_flows_in_related(netdev, netdev); if (!netdev_vport_is_vport_class(netdev->netdev_class)) { - dpif_netdev_offload_ports_traverse(flush_in_vport_cb, netdev); + dpif_offload_rte_traverse_ports(offload, flush_in_vport_cb, netdev); } return 0; @@ -2670,7 +2679,8 @@ out: } static struct netdev * -get_vport_netdev(struct rte_flow_tunnel *tunnel, +get_vport_netdev(struct dpif_offload_rte_flow *offload, + struct rte_flow_tunnel *tunnel, odp_port_t *odp_port) { struct get_vport_netdev_aux aux = { @@ -2685,14 +2695,14 @@ get_vport_netdev(struct rte_flow_tunnel *tunnel, } else if (tunnel->type == RTE_FLOW_ITEM_TYPE_GRE) { aux.type = "gre"; } - dpif_netdev_offload_ports_traverse(get_vport_netdev_cb, &aux); + dpif_offload_rte_traverse_ports(offload, get_vport_netdev_cb, &aux); return aux.vport; } -int -netdev_offload_dpdk_hw_miss_packet_recover(struct netdev *netdev, - struct dp_packet *packet) +int netdev_offload_dpdk_hw_miss_packet_recover( + struct dpif_offload_rte_flow *offload, struct netdev *netdev, + struct dp_packet *packet) { struct rte_flow_restore_info rte_restore_info; struct rte_flow_tunnel *rte_tnl; @@ -2719,7 +2729,7 @@ netdev_offload_dpdk_hw_miss_packet_recover(struct netdev *netdev, } rte_tnl = &rte_restore_info.tunnel; - vport_netdev = get_vport_netdev(rte_tnl, &vport_odp); + vport_netdev = get_vport_netdev(offload, rte_tnl, &vport_odp); if (!vport_netdev) { VLOG_WARN_RL(&rl, "Could not find vport netdev"); return EOPNOTSUPP; @@ -2781,7 +2791,8 @@ close_vport_netdev: } uint64_t -netdev_offload_dpdk_flow_get_n_offloaded(struct netdev *netdev) +netdev_offload_dpdk_flow_get_n_offloaded(struct netdev *netdev, + unsigned int offload_thread_count) { struct netdev_offload_dpdk_data *data; uint64_t total = 0; @@ -2793,9 +2804,24 @@ netdev_offload_dpdk_flow_get_n_offloaded(struct netdev *netdev) return 0; } - for (tid = 0; tid < rte_flow_offload_thread_nb(); tid++) { + for (tid = 0; tid < offload_thread_count; tid++) { total += data->rte_flow_counters[tid]; } return total; } + +uint64_t +netdev_offload_dpdk_flow_get_n_offloaded_by_thread(struct netdev *netdev, + unsigned int tid) +{ + struct netdev_offload_dpdk_data *data; + + data = (struct netdev_offload_dpdk_data *) + ovsrcu_get(void *, &netdev->hw_info.offload_data); + if (!data) { + return 0; + } + + return data->rte_flow_counters[tid]; +} diff --git a/lib/netdev-offload-dpdk.h b/lib/netdev-offload-dpdk.h index 3587a3a72..caed01bd1 100644 --- a/lib/netdev-offload-dpdk.h +++ b/lib/netdev-offload-dpdk.h @@ -18,22 +18,28 @@ #define NETDEV_OFFLOAD_DPDK_H /* Forward declarations of private structures. */ +struct dpif_offload_rte_flow; struct netdev; -struct dpif_netdev_offload_info; /* Netdev-specific offload functions. These should only be used by the * associated dpif offload provider. */ -int netdev_offload_dpdk_init(struct netdev *); +int netdev_offload_dpdk_init(struct netdev *, + unsigned int offload_thread_count); void netdev_offload_dpdk_uninit(struct netdev *); -int netdev_offload_dpdk_flow_flush(struct netdev *); -uint64_t netdev_offload_dpdk_flow_get_n_offloaded(struct netdev *); -int netdev_offload_dpdk_hw_miss_packet_recover(struct netdev *, +int netdev_offload_dpdk_flow_flush(struct dpif_offload_rte_flow *, + struct netdev *); +uint64_t netdev_offload_dpdk_flow_get_n_offloaded( + struct netdev *, unsigned int offload_thread_count); +uint64_t netdev_offload_dpdk_flow_get_n_offloaded_by_thread( + struct netdev *, unsigned int tid); +int netdev_offload_dpdk_hw_miss_packet_recover(struct dpif_offload_rte_flow *, + struct netdev *, struct dp_packet *); -#ifdef DPDK_NETDEV -int netdev_offload_dpdk_flow_put(struct netdev *, struct match *, +int netdev_offload_dpdk_flow_put(struct dpif_offload_rte_flow *, + struct netdev *, struct match *, struct nlattr *actions, size_t actions_len, - const ovs_u128 *ufid, - struct dpif_netdev_offload_info *, + const ovs_u128 *ufid, uint32_t flow_mark, + odp_port_t orig_in_port, struct dpif_flow_stats *); int netdev_offload_dpdk_flow_del(struct netdev *, const ovs_u128 *ufid, struct dpif_flow_stats *); @@ -41,38 +47,5 @@ int netdev_offload_dpdk_flow_get(struct netdev *, struct match *, struct nlattr **actions, const ovs_u128 *ufid, struct dpif_flow_stats *, struct dpif_flow_attrs *, struct ofpbuf *buf); -#else -static inline int -netdev_offload_dpdk_flow_put(struct netdev *netdev OVS_UNUSED, - struct match *match OVS_UNUSED, - struct nlattr *actions OVS_UNUSED, - size_t actions_len OVS_UNUSED, - const ovs_u128 *ufid OVS_UNUSED, - struct dpif_netdev_offload_info *info OVS_UNUSED, - struct dpif_flow_stats *stats OVS_UNUSED) -{ - return EOPNOTSUPP; -} - -static inline int -netdev_offload_dpdk_flow_del(struct netdev *netdev OVS_UNUSED, - const ovs_u128 *ufid OVS_UNUSED, - struct dpif_flow_stats *stats OVS_UNUSED) -{ - return EOPNOTSUPP; -} - -static inline int -netdev_offload_dpdk_flow_get(struct netdev *netdev OVS_UNUSED, - struct match *match OVS_UNUSED, - struct nlattr **actions OVS_UNUSED, - const ovs_u128 *ufid OVS_UNUSED, - struct dpif_flow_stats *stats OVS_UNUSED, - struct dpif_flow_attrs *attrs OVS_UNUSED, - struct ofpbuf *buf OVS_UNUSED) -{ - return EOPNOTSUPP; -} -#endif /* #ifdef DPDK_NETDEV */ #endif /* NETDEV_OFFLOAD_DPDK_H */ -- 2.50.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
