On 7/6/17, 10:50 PM, "Ilya Maximets" <i.maxim...@samsung.com> wrote:
On 07.07.2017 07:33, Darrell Ball wrote: > > > On 6/15/17, 4:36 AM, "Ilya Maximets" <i.maxim...@samsung.com> wrote: > > Currently, change of 'pmd-cpu-mask' is very heavy operation. > It requires destroying of all the PMD threads and creating > them back. After that, all the threads will sleep until > ports' redistribution finished. > > This patch adds ability to not stop the datapath while > adjusting number/placement of PMD threads. All not affected > threads will forward traffic without any additional latencies. > > id-pool created for static tx queue ids to keep them sequential > in a flexible way. non-PMD thread will always have > static_tx_qid = 0 as it was before. > > Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> > Tested-by: Mark Kavanagh <mark.b.kavan...@intel.com> > Acked-by: Mark Kavanagh <mark.b.kavan...@intel.com> > --- > lib/dpif-netdev.c | 146 +++++++++++++++++++++++++++++++++++++++--------------- > tests/pmd.at | 2 +- > 2 files changed, 108 insertions(+), 40 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 2f224db..c0bcca0 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -48,6 +48,7 @@ > #include "fat-rwlock.h" > #include "flow.h" > #include "hmapx.h" > +#include "id-pool.h" > #include "latch.h" > #include "netdev.h" > #include "netdev-vport.h" > @@ -278,6 +279,9 @@ struct dp_netdev { > > /* Stores all 'struct dp_netdev_pmd_thread's. */ > struct cmap poll_threads; > + /* id pool for per thread static_tx_qid. */ > + struct id_pool *tx_qid_pool; > + struct ovs_mutex tx_qid_pool_mutex; > > /* Protects the access of the 'struct dp_netdev_pmd_thread' > * instance for non-pmd thread. */ > @@ -563,7 +567,7 @@ struct dp_netdev_pmd_thread { > /* Queue id used by this pmd thread to send packets on all netdevs if > * XPS disabled for this netdev. All static_tx_qid's are unique and less > * than 'cmap_count(dp->poll_threads)'. */ > - const int static_tx_qid; > + uint32_t static_tx_qid; > > struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */ > /* List of rx queues to poll. */ > @@ -643,6 +647,8 @@ static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp, > unsigned core_id); > static struct dp_netdev_pmd_thread * > dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos); > +static void dp_netdev_del_pmd(struct dp_netdev *dp, > + struct dp_netdev_pmd_thread *pmd); > static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool non_pmd); > static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd); > static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, > @@ -1182,10 +1188,17 @@ create_dp_netdev(const char *name, const struct dpif_class *class, > atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); > > cmap_init(&dp->poll_threads); > + > + ovs_mutex_init(&dp->tx_qid_pool_mutex); > + /* We need 1 Tx queue for each possible core + 1 for non-PMD threads. */ > + dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + 1); > + > ovs_mutex_init_recursive(&dp->non_pmd_mutex); > ovsthread_key_create(&dp->per_pmd_key, NULL); > > ovs_mutex_lock(&dp->port_mutex); > + /* non-PMD will be created before all other threads and will > + * allocate static_tx_qid = 0. */ > dp_netdev_set_nonpmd(dp); > > error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class, > @@ -1280,6 +1293,9 @@ dp_netdev_free(struct dp_netdev *dp) > dp_netdev_destroy_all_pmds(dp, true); > cmap_destroy(&dp->poll_threads); > > + ovs_mutex_destroy(&dp->tx_qid_pool_mutex); > + id_pool_destroy(dp->tx_qid_pool); > + > ovs_mutex_destroy(&dp->non_pmd_mutex); > ovsthread_key_delete(dp->per_pmd_key); > > @@ -3296,12 +3312,29 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex) > } > > static void > +reload_affected_pmds(struct dp_netdev *dp) > +{ > + struct dp_netdev_pmd_thread *pmd; > + > + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > + if (pmd->need_reload) { > + dp_netdev_reload_pmd__(pmd); > + pmd->need_reload = false; > + } > + } > +} > + > +static void > reconfigure_pmd_threads(struct dp_netdev *dp) > OVS_REQUIRES(dp->port_mutex) > { > struct dp_netdev_pmd_thread *pmd; > struct ovs_numa_dump *pmd_cores; > + struct ovs_numa_info_core *core; > + struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete); > + struct hmapx_node *node; > bool changed = false; > + bool need_to_adjust_static_tx_qids = false; > > /* The pmd threads should be started only if there's a pmd port in the > * datapath. If the user didn't provide any "pmd-cpu-mask", we start > @@ -3314,40 +3347,64 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS); > } > > - /* Check for changed configuration */ > - if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) { > - changed = true; > - } else { > - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > - if (pmd->core_id != NON_PMD_CORE_ID > - && !ovs_numa_dump_contains_core(pmd_cores, > - pmd->numa_id, > - pmd->core_id)) { > - changed = true; > - break; > - } > + /* We need to adjust 'static_tx_qid's only if we're reducing number of > + * PMD threads. > > I don’t understand the placement of the above comment before this selection statement. > If PMD threads are greater than the number of cores, we flag the need to adjust tx qids > Can you explain the above comment ? Number of cores in 'pmd_cores' is the number of threads we will have after reconfiguration. If current number of threads greater than number of threads we'll have in the end (i.e. we are reducing number of pmd htreads), then we need to adjust static txq ids. ok, I had noted the pmd_cores local variable name before and forgot about it. I had looked at the below ‘if’ check and could not make sense of it with my assumed definition of pmd_cores; I should have checked the surrounding code. > + if (ovs_numa_dump_count(pmd_cores) < cmap_count(&dp->poll_threads) - 1) { Since Mark K. tested it, it looks good to go then. > > Otherwise, new threads will allocate all the freed ids. */ > + if (ovs_numa_dump_count(pmd_cores) < cmap_count(&dp->poll_threads) - 1) { > + /* Adjustment is required to keep 'static_tx_qid's sequential and > + * avoid possible issues, for example, imbalanced tx queue usage > + * and unnecessary locking caused by remapping on netdev level. */ > + need_to_adjust_static_tx_qids = true; > + } > + > + /* Check for unwanted pmd threads */ > + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > + if (pmd->core_id == NON_PMD_CORE_ID) { > + continue; > + } > + if (!ovs_numa_dump_contains_core(pmd_cores, pmd->numa_id, > + pmd->core_id)) { > + hmapx_add(&to_delete, pmd); > + } else if (need_to_adjust_static_tx_qids) { > + pmd->need_reload = true; > } > } > > - /* Destroy the old and recreate the new pmd threads. We don't perform an > - * incremental update because we would have to adjust 'static_tx_qid'. */ > - if (changed) { > - struct ovs_numa_info_core *core; > - struct ovs_numa_info_numa *numa; > + HMAPX_FOR_EACH (node, &to_delete) { > + pmd = (struct dp_netdev_pmd_thread *) node->data; > + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d destroyed.", > + pmd->numa_id, pmd->core_id); > + dp_netdev_del_pmd(dp, pmd); > + } > + changed = !hmapx_is_empty(&to_delete); > + hmapx_destroy(&to_delete); > > - /* Do not destroy the non pmd thread. */ > - dp_netdev_destroy_all_pmds(dp, false); > - FOR_EACH_CORE_ON_DUMP (core, pmd_cores) { > - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); > + if (need_to_adjust_static_tx_qids) { > + /* 'static_tx_qid's are not sequential now. > + * Reload remaining threads to fix this. */ > + reload_affected_pmds(dp); > + } > > + /* Check for required new pmd threads */ > + FOR_EACH_CORE_ON_DUMP(core, pmd_cores) { > + pmd = dp_netdev_get_pmd(dp, core->core_id); > + if (!pmd) { > + pmd = xzalloc(sizeof *pmd); > dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id); > - > pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); > + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.", > + pmd->numa_id, pmd->core_id); > + changed = true; > + } else { > + dp_netdev_pmd_unref(pmd); > } > + } > + > + if (changed) { > + struct ovs_numa_info_numa *numa; > > /* Log the number of pmd threads per numa node. */ > FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) { > - VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node %d", > + VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa node %d", > numa->n_cores, numa->numa_id); > } > } > @@ -3356,19 +3413,6 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > } > > static void > -reload_affected_pmds(struct dp_netdev *dp) > -{ > - struct dp_netdev_pmd_thread *pmd; > - > - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > - if (pmd->need_reload) { > - dp_netdev_reload_pmd__(pmd); > - pmd->need_reload = false; > - } > - } > -} > - > -static void > pmd_remove_stale_ports(struct dp_netdev *dp, > struct dp_netdev_pmd_thread *pmd) > OVS_EXCLUDED(pmd->port_mutex) > @@ -3668,6 +3712,28 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd) > } > } > > +static void > +pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd) > +{ > + ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex); > + if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) { > + VLOG_ABORT("static_tx_qid allocation failed for PMD on core %2d" > + ", numa_id %d.", pmd->core_id, pmd->numa_id); > + } > + ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex); > + > + VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core %2d" > + ", numa_id %d.", pmd->static_tx_qid, pmd->core_id, pmd->numa_id); > +} > + > +static void > +pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd) > +{ > + ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex); > + id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid); > + ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex); > +} > + > static int > pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd, > struct polled_queue **ppoll_list) > @@ -3713,6 +3779,7 @@ pmd_thread_main(void *f_) > dpdk_set_lcore_id(pmd->core_id); > poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); > reload: > + pmd_alloc_static_tx_qid(pmd); > emc_cache_init(&pmd->flow_cache); > > /* List port/core affinity */ > @@ -3761,6 +3828,7 @@ reload: > dp_netdev_pmd_reload_done(pmd); > > emc_cache_uninit(&pmd->flow_cache); > + pmd_free_static_tx_qid(pmd); > > if (!exiting) { > goto reload; > @@ -4171,8 +4239,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, > pmd->numa_id = numa_id; > pmd->need_reload = false; > > - *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp->poll_threads); > - > ovs_refcount_init(&pmd->ref_cnt); > latch_init(&pmd->exit_latch); > pmd->reload_seq = seq_create(); > @@ -4193,6 +4259,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, > * actual thread created for NON_PMD_CORE_ID. */ > if (core_id == NON_PMD_CORE_ID) { > emc_cache_init(&pmd->flow_cache); > + pmd_alloc_static_tx_qid(pmd); > } > cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node), > hash_int(core_id, 0)); > @@ -4235,6 +4302,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd) > ovs_mutex_lock(&dp->non_pmd_mutex); > emc_cache_uninit(&pmd->flow_cache); > pmd_free_cached_ports(pmd); > + pmd_free_static_tx_qid(pmd); > ovs_mutex_unlock(&dp->non_pmd_mutex); > } else { > latch_set(&pmd->exit_latch); > diff --git a/tests/pmd.at b/tests/pmd.at > index 05755b3..39a3645 100644 > --- a/tests/pmd.at > +++ b/tests/pmd.at > @@ -38,7 +38,7 @@ dnl > dnl Whaits for creation of 'n_threads' or at least 1 thread if $1 not > dnl passed. Checking starts from line number 'line' in ovs-vswithd.log . > m4_define([CHECK_PMD_THREADS_CREATED], [ > - PATTERN="Created [[0-9]]* pmd threads on numa node $2" > + PATTERN="There are [[0-9]]* pmd threads on numa node $2" > line_st=$3 > if [[ -z "$line_st" ]] > then > -- > 2.7.4 > > > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev