On Fri, Dec 13, 2019 at 6:34 PM Ilya Maximets <[email protected]> wrote: > > @@ -94,6 +99,38 @@ args_contains(const struct svec *args, const char *value) > > return false; > > } > > > > +static void > > +construct_dpdk_lcore_option(const struct smap *ovs_other_config, > > + struct svec *args) > > +{ > > + const char *cmask = smap_get(ovs_other_config, "dpdk-lcore-mask"); > > + struct svec lcores = SVEC_EMPTY_INITIALIZER; > > + struct ovs_numa_info_core *core; > > + struct ovs_numa_dump *cores; > > + int index = 0; > > + > > + if (!cmask) { > > + return; > > + } > > + if (args_contains(args, "-c") || args_contains(args, "-l") || > > + args_contains(args, "--lcores")) { > > + VLOG_WARN("Ignoring database defined option > > 'dpdk-lcore-mask' " > > + "due to dpdk-extra config"); > > + return; > > + } > > + > > + cores = ovs_numa_dump_cores_with_cmask(cmask); > > + FOR_EACH_CORE_ON_DUMP(core, cores) { > > + svec_add_nocopy(&lcores, xasprintf("%d@%d", index, core->core_id)); > > What's the point of re-mapping these cores? > Just let DPDK to fail initialization and user will adjust cores. > This automatic re-mapping only complicates everything.
Deal with existing user of this parameter. Example: https://bugzilla.redhat.com/show_bug.cgi?id=1753432 > > IMHO, user should not configure DPDK cores at all. So, it's users' problem > if cores configured incorrectly. If the user explicitly had set a dpdk option, ok, this is his problem. But here, this is an OVS option, handling this internally makes more sense to me. > > + index++; > > + } > > + svec_terminate(&lcores); > > + ovs_numa_dump_destroy(cores); > > + svec_add(args, "--lcores"); > > + svec_add_nocopy(args, svec_join(&lcores, ",", "")); > > + svec_destroy(&lcores); > > +} > > + > > static void > > construct_dpdk_options(const struct smap *ovs_other_config, struct svec > > *args) > > { > > @@ -103,7 +140,6 @@ construct_dpdk_options(const struct smap > > *ovs_other_config, struct svec *args) > > bool default_enabled; > > const char *default_value; > > } opts[] = { > > - {"dpdk-lcore-mask", "-c", false, NULL}, > > {"dpdk-hugepage-dir", "--huge-dir", false, NULL}, > > {"dpdk-socket-limit", "--socket-limit", false, NULL}, > > }; > > @@ -224,6 +260,7 @@ construct_dpdk_args(const struct smap > > *ovs_other_config, struct svec *args) > > svec_parse_words(args, extra_configuration); > > } > > > > + construct_dpdk_lcore_option(ovs_other_config, args); > > construct_dpdk_options(ovs_other_config, args); > > construct_dpdk_mutex_options(ovs_other_config, args); > > } > > @@ -264,6 +301,58 @@ static cookie_io_functions_t dpdk_log_func = { > > .write = dpdk_log_write, > > }; > > > > +static void > > +dpdk_dump_lcore(struct ds *ds, unsigned lcore) > > +{ > > + struct svec cores = SVEC_EMPTY_INITIALIZER; > > + rte_cpuset_t cpuset; > > + unsigned cpu; > > + > > + cpuset = rte_lcore_cpuset(lcore); > > + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { > > + if (!CPU_ISSET(cpu, &cpuset)) { > > + continue; > > + } > > + svec_add_nocopy(&cores, xasprintf("%u", cpu)); > > + }> + svec_terminate(&cores); > > + ds_put_format(ds, "lcore%u (%s) is running on core %s\n", lcore, > > + rte_eal_lcore_role(lcore) != ROLE_OFF ? "DPDK" : "OVS", > > + svec_join(&cores, ",", "")); > > + svec_destroy(&cores); > > +} > > + > > +static void > > +dpdk_dump_lcores(struct unixctl_conn *conn, int argc, const char *argv[], > > + void *aux OVS_UNUSED) > > +{ > > + struct ds ds = DS_EMPTY_INITIALIZER; > > + unsigned lcore; > > + > > + ovs_mutex_lock(&lcore_bitmap_mutex); > > + if (lcore_bitmap == NULL) { > > + unixctl_command_reply_error(conn, "DPDK has not been initialised"); > > + goto out; > > + } > > + if (argc > 1) { > > + if (!str_to_uint(argv[1], 0, &lcore) || lcore >= RTE_MAX_LCORE || > > + !bitmap_is_set(lcore_bitmap, lcore)) { > > + unixctl_command_reply_error(conn, "incorrect lcoreid"); > > + goto out; > > + } > > + dpdk_dump_lcore(&ds, lcore); > > + } else for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) { > > Not a good style to have 'else for'. Tried to be creative :-) (the additional indent is really unnecessary). But, if this is a blocker for this patch, ok. > > DPDK lcore iterator? We are iterating over lcores owned by DPDK and OVS. Example: other_config : {dpdk-init="true", dpdk-lcore-mask="0x1", pmd-cpu-mask="0x00008002"} # ovs-appctl dpdk/dump-lcores lcore0 (DPDK) is running on core 0 lcore1 (OVS) is running on core 1 lcore2 (OVS) is running on core 15 > > > + if (!bitmap_is_set(lcore_bitmap, lcore)) { > > + continue; > > + } > > + dpdk_dump_lcore(&ds, lcore); > > + } > > + unixctl_command_reply(conn, ds_cstr(&ds)); > > + ds_destroy(&ds); > > +out: > > + ovs_mutex_unlock(&lcore_bitmap_mutex); > > +} > > + > > static bool > > dpdk_init__(const struct smap *ovs_other_config) > > { > > @@ -428,6 +518,24 @@ dpdk_init__(const struct smap *ovs_other_config) > > } > > } > > > > + ovs_mutex_lock(&lcore_bitmap_mutex); > > + lcore_bitmap = bitmap_allocate(RTE_MAX_LCORE); > > + /* Mark DPDK threads. */ > > + for (uint32_t lcore = 0; lcore < RTE_MAX_LCORE; lcore++) { > > Shouldn't we use DPDK lcore iterator instead? The DPDK lcore iterator skips "service" cores. We would end up reusing those lcores if the user had set something exotic. Example: other_config : {dpdk-extra="-c 0x5 -s 0x4", dpdk-init="true", pmd-cpu-mask="0x00008002"} # ovs-appctl dpdk/dump-lcores lcore0 (DPDK) is running on core 0 lcore1 (OVS) is running on core 1 lcore2 (DPDK) is running on core 2 lcore3 (OVS) is running on core 15 > > > + struct ds ds = DS_EMPTY_INITIALIZER; > > + > > + if (rte_eal_lcore_role(lcore) == ROLE_OFF) { > > + continue; > > + } > > + bitmap_set1(lcore_bitmap, lcore); > > + dpdk_dump_lcore(&ds, lcore); > > + VLOG_INFO("%s", ds_cstr(&ds)); > > + ds_destroy(&ds); > > + } > > + unixctl_command_register("dpdk/dump-lcores", "[lcore]", 0, 1, > > + dpdk_dump_lcores, NULL); > > We might need this command if we're doing automatic re-mapping > of lcores from the EAL arguments, but if not, I don't think we need it. > Simple logging on PMD start should be enough. Flavio mentioned that the logs could have been rotated. > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > > index 7ebf4ef87..91e98919c 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -5470,7 +5470,7 @@ pmd_thread_main(void *f_) > > /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ > > ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); > > ovs_numa_thread_setaffinity_core(pmd->core_id); > > - dpdk_set_lcore_id(pmd->core_id); > > + dpdk_init_thread_context(pmd->core_id); > > Since this function will atually change cpu affinity that is not This is the same affinity than what OVS just applied. The call to pthread_setaffinity_np in dpdk should be a noop. > expected by OVS it must be called before setting affinity by > ovs_numa_thread_setaffinity_core(). This way OVS will guarantee > correct affinity for its threads. ovs_numa_thread_setaffinity_core() return value is not checked. And I can't see what calling this function changes to the ovs-numa internals. We could even remove it and only rely on dpdk. -- David Marchand _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
