>From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-boun...@openvswitch.org] >On Behalf Of >Ilya Maximets >Sent: Tuesday, May 30, 2017 3:12 PM >To: d...@openvswitch.org; Darrell Ball <db...@vmware.com> >Cc: Heetae Ahn <heetae82....@samsung.com>; Ilya Maximets ><i.maxim...@samsung.com> >Subject: [ovs-dev] [PATCH v2 1/3] dpif-netdev: Incremental addition/deletion >of PMD threads. > >Currently, change of 'pmd-cpu-mask' is very heavy operation. >It requires destroying of all the PMD threads and creating >them back. After that, all the threads will sleep until >ports' redistribution finished. > >This patch adds ability to not stop the datapath while >adjusting number/placement of PMD threads. All not affected >threads will forward traffic without any additional latencies. > >id-pool created for static tx queue ids to keep them sequential >in a flexible way. non-PMD thread will always have >static_tx_qid = 0 as it was before. > >Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>
Hi Ilya, Patch LGTM - just once comment inline. I conducted some light testing and observed the following: - temporary dip in forwarding rate (~50%) when modifying pmd-cpu-mask (in previous implementation, forwarding stopped completely) - occasional drop to zero in forwarding rate when modifying pmd-cpu-mask (e.g. change from '2' to '12') - temporary drop of ~1mpps when adding/deleting port (similar to existing implementation) I've also conducted the following sanity checks on the patch, and found no issues: - checkpatch.py - compilation with clang - compilation with gcc - sparse - make check (no additional tests fail after application of this patch) Tested-by: Mark Kavanagh <mark.b.kavan...@intel.com> Thanks, Mark >--- > lib/dpif-netdev.c | 143 +++++++++++++++++++++++++++++++++++++++--------------- > tests/pmd.at | 2 +- > 2 files changed, 105 insertions(+), 40 deletions(-) > >diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >index b50164b..596d133 100644 >--- a/lib/dpif-netdev.c >+++ b/lib/dpif-netdev.c >@@ -48,6 +48,7 @@ > #include "fat-rwlock.h" > #include "flow.h" > #include "hmapx.h" >+#include "id-pool.h" > #include "latch.h" > #include "netdev.h" > #include "netdev-vport.h" >@@ -277,6 +278,9 @@ struct dp_netdev { > > /* Stores all 'struct dp_netdev_pmd_thread's. */ > struct cmap poll_threads; >+ /* id pool for per thread static_tx_qid. */ >+ struct id_pool *tx_qid_pool; >+ struct ovs_mutex tx_qid_pool_mutex; > > /* Protects the access of the 'struct dp_netdev_pmd_thread' > * instance for non-pmd thread. */ >@@ -562,7 +566,7 @@ struct dp_netdev_pmd_thread { > /* Queue id used by this pmd thread to send packets on all netdevs if > * XPS disabled for this netdev. All static_tx_qid's are unique and less > * than 'cmap_count(dp->poll_threads)'. */ >- const int static_tx_qid; >+ uint32_t static_tx_qid; > > struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. > */ > /* List of rx queues to poll. */ >@@ -642,6 +646,8 @@ static struct dp_netdev_pmd_thread >*dp_netdev_get_pmd(struct dp_netdev >*dp, > unsigned core_id); > static struct dp_netdev_pmd_thread * > dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos); >+static void dp_netdev_del_pmd(struct dp_netdev *dp, >+ struct dp_netdev_pmd_thread *pmd); > static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool non_pmd); > static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd); > static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, >@@ -1181,10 +1187,17 @@ create_dp_netdev(const char *name, const struct >dpif_class *class, > atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); > > cmap_init(&dp->poll_threads); >+ >+ ovs_mutex_init(&dp->tx_qid_pool_mutex); >+ /* We need 1 Tx queue for each possible core + 1 for non-PMD threads. */ >+ dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + 1); >+ > ovs_mutex_init_recursive(&dp->non_pmd_mutex); > ovsthread_key_create(&dp->per_pmd_key, NULL); > > ovs_mutex_lock(&dp->port_mutex); >+ /* non-PMD will be created before all other threads and will >+ * allocate static_tx_qid = 0. */ > dp_netdev_set_nonpmd(dp); > > error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class, >@@ -1279,6 +1292,9 @@ dp_netdev_free(struct dp_netdev *dp) > dp_netdev_destroy_all_pmds(dp, true); > cmap_destroy(&dp->poll_threads); > >+ ovs_mutex_destroy(&dp->tx_qid_pool_mutex); >+ id_pool_destroy(dp->tx_qid_pool); >+ > ovs_mutex_destroy(&dp->non_pmd_mutex); > ovsthread_key_delete(dp->per_pmd_key); > >@@ -3302,12 +3318,29 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) >OVS_REQUIRES(dp- >>port_mutex) > } > > static void >+reload_affected_pmds(struct dp_netdev *dp) >+{ >+ struct dp_netdev_pmd_thread *pmd; >+ >+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >+ if (pmd->need_reload) { >+ dp_netdev_reload_pmd__(pmd); >+ pmd->need_reload = false; >+ } >+ } >+} >+ >+static void > reconfigure_pmd_threads(struct dp_netdev *dp) > OVS_REQUIRES(dp->port_mutex) > { > struct dp_netdev_pmd_thread *pmd; > struct ovs_numa_dump *pmd_cores; >+ struct ovs_numa_info_core *core; >+ struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete); >+ struct hmapx_node *node; > bool changed = false; >+ bool need_to_adjust_static_tx_qids = false; > > /* The pmd threads should be started only if there's a pmd port in the > * datapath. If the user didn't provide any "pmd-cpu-mask", we start >@@ -3320,40 +3353,61 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS); > } > >- /* Check for changed configuration */ >- if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) { >- changed = true; >- } else { >- CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >- if (pmd->core_id != NON_PMD_CORE_ID >- && !ovs_numa_dump_contains_core(pmd_cores, >- pmd->numa_id, >- pmd->core_id)) { >- changed = true; >- break; >- } >+ /* We need to adjust 'static_tx_qid's only if we're reducing number of >+ * PMD threads. Otherwise, new threads will allocate all the freed ids. */ Can you elaborate on why the static_tx_qids need to be adjusted - it's still not clear to me from the comment. >+ if (ovs_numa_dump_count(pmd_cores) < cmap_count(&dp->poll_threads) - 1) { >+ need_to_adjust_static_tx_qids = true; >+ } >+ >+ /* Check for unwanted pmd threads */ >+ CMAP_FOR_EACH(pmd, node, &dp->poll_threads) { >+ if (pmd->core_id == NON_PMD_CORE_ID) { >+ continue; >+ } >+ if (!ovs_numa_dump_contains_core(pmd_cores, pmd->numa_id, >+ pmd->core_id)) { >+ hmapx_add(&to_delete, pmd); >+ } else if (need_to_adjust_static_tx_qids) { >+ pmd->need_reload = true; > } > } > >- /* Destroy the old and recreate the new pmd threads. We don't perform an >- * incremental update because we would have to adjust 'static_tx_qid'. */ >- if (changed) { >- struct ovs_numa_info_core *core; >- struct ovs_numa_info_numa *numa; >+ HMAPX_FOR_EACH (node, &to_delete) { >+ pmd = (struct dp_netdev_pmd_thread *) node->data; >+ VLOG_INFO("PMD thread on numa_id: %d, core id: %2d destroyed.", >+ pmd->numa_id, pmd->core_id); >+ dp_netdev_del_pmd(dp, pmd); >+ } >+ changed = !hmapx_is_empty(&to_delete); >+ hmapx_destroy(&to_delete); > >- /* Do not destroy the non pmd thread. */ >- dp_netdev_destroy_all_pmds(dp, false); >- FOR_EACH_CORE_ON_DUMP (core, pmd_cores) { >- struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); >+ if (need_to_adjust_static_tx_qids) { >+ /* 'static_tx_qid's are not sequential now. >+ * Reload remaining threads to fix this. */ >+ reload_affected_pmds(dp); >+ } > >+ /* Check for required new pmd threads */ >+ FOR_EACH_CORE_ON_DUMP(core, pmd_cores) { >+ pmd = dp_netdev_get_pmd(dp, core->core_id); >+ if (!pmd) { >+ pmd = xzalloc(sizeof *pmd); > dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id); >- > pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); >+ VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.", >+ pmd->numa_id, pmd->core_id); >+ changed = true; >+ } else { >+ dp_netdev_pmd_unref(pmd); > } >+ } >+ >+ if (changed) { >+ struct ovs_numa_info_numa *numa; > > /* Log the number of pmd threads per numa node. */ > FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) { >- VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node %d", >+ VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa node %d", > numa->n_cores, numa->numa_id); > } > } >@@ -3362,19 +3416,6 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > } > > static void >-reload_affected_pmds(struct dp_netdev *dp) >-{ >- struct dp_netdev_pmd_thread *pmd; >- >- CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >- if (pmd->need_reload) { >- dp_netdev_reload_pmd__(pmd); >- pmd->need_reload = false; >- } >- } >-} >- >-static void > pmd_remove_stale_ports(struct dp_netdev *dp, > struct dp_netdev_pmd_thread *pmd) > OVS_EXCLUDED(pmd->port_mutex) >@@ -3673,6 +3714,28 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd) > } > } > >+static void >+pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd) >+{ >+ ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex); >+ if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) { >+ VLOG_ABORT("static_tx_qid allocation failed for PMD on core %2d" >+ ", numa_id %d.", pmd->core_id, pmd->numa_id); >+ } >+ ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex); >+ >+ VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core %2d" >+ ", numa_id %d.", pmd->static_tx_qid, pmd->core_id, pmd->numa_id); >+} >+ >+static void >+pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd) >+{ >+ ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex); >+ id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid); >+ ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex); >+} >+ > static int > pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd, > struct polled_queue **ppoll_list) >@@ -3718,6 +3781,7 @@ pmd_thread_main(void *f_) > dpdk_set_lcore_id(pmd->core_id); > poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); > reload: >+ pmd_alloc_static_tx_qid(pmd); > emc_cache_init(&pmd->flow_cache); > > /* List port/core affinity */ >@@ -3766,6 +3830,7 @@ reload: > dp_netdev_pmd_reload_done(pmd); > > emc_cache_uninit(&pmd->flow_cache); >+ pmd_free_static_tx_qid(pmd); > > if (!exiting) { > goto reload; >@@ -4176,8 +4241,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread >*pmd, struct >dp_netdev *dp, > pmd->numa_id = numa_id; > pmd->need_reload = false; > >- *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp->poll_threads); >- > ovs_refcount_init(&pmd->ref_cnt); > latch_init(&pmd->exit_latch); > pmd->reload_seq = seq_create(); >@@ -4198,6 +4261,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread >*pmd, struct >dp_netdev *dp, > * actual thread created for NON_PMD_CORE_ID. */ > if (core_id == NON_PMD_CORE_ID) { > emc_cache_init(&pmd->flow_cache); >+ pmd_alloc_static_tx_qid(pmd); > } > cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node), > hash_int(core_id, 0)); >@@ -4240,6 +4304,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct >dp_netdev_pmd_thread >*pmd) > ovs_mutex_lock(&dp->non_pmd_mutex); > emc_cache_uninit(&pmd->flow_cache); > pmd_free_cached_ports(pmd); >+ pmd_free_static_tx_qid(pmd); > ovs_mutex_unlock(&dp->non_pmd_mutex); > } else { > latch_set(&pmd->exit_latch); >diff --git a/tests/pmd.at b/tests/pmd.at >index 05755b3..39a3645 100644 >--- a/tests/pmd.at >+++ b/tests/pmd.at >@@ -38,7 +38,7 @@ dnl > dnl Whaits for creation of 'n_threads' or at least 1 thread if $1 not > dnl passed. Checking starts from line number 'line' in ovs-vswithd.log . > m4_define([CHECK_PMD_THREADS_CREATED], [ >- PATTERN="Created [[0-9]]* pmd threads on numa node $2" >+ PATTERN="There are [[0-9]]* pmd threads on numa node $2" > line_st=$3 > if [[ -z "$line_st" ]] > then >-- >2.7.4 > >_______________________________________________ >dev mailing list >d...@openvswitch.org >https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev