Previous diff was completely wrong. Sorry. @@ -2971,6 +2970,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id) * pmd threads for the numa node. */ if (!n_pmds) { int can_have, n_unpinned, i; + struct dp_netdev_pmd_thread **pmds; n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id); if (!n_unpinned) { @@ -2982,15 +2982,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id) /* If cpu mask is specified, uses all unpinned cores, otherwise * tries creating NR_PMD_THREADS pmd threads. */ can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS); + pmds = xzalloc(can_have * sizeof(struct dp_netdev_pmd_thread*)); for (i = 0; i < can_have; i++) { - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id); - - dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); + pmds[i] = xzalloc(sizeof(struct dp_netdev_pmd_thread)); + dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id); + } + /* The pmd thread code needs to see all the others configured pmd + * threads on the same numa node. That's why we call + * 'dp_netdev_configure_pmd()' on all the threads and then we actually + * start them. */ + for (i = 0; i < can_have; i++) { /* Each thread will distribute all devices rx-queues among * themselves. */ - pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); + pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main, pmds[i]); } + free(pmds); VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id); } }
On 27.07.2015 11:17, Ilya Maximets wrote: > Sorry, > without dp_netdev_reload_pmds(dp) at the end. > > On 27.07.2015 10:58, Ilya Maximets wrote: >> Yes, I think, this way is better. But it may be more simple >> if we just keep all the dp_netdev_pmd_thread structures. >> There is no need to search over all pmd threads on system. >> >> Like this: >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index 79c4612..8e4c025 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -2961,6 +2960,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int >> numa_id) >> * pmd threads for the numa node. */ >> if (!n_pmds) { >> int can_have, n_unpinned, i; >> + struct dp_netdev_pmd_thread *pmd; >> >> n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id); >> if (!n_unpinned) { >> @@ -2972,16 +2972,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int >> numa_id) >> /* If cpu mask is specified, uses all unpinned cores, otherwise >> * tries creating NR_PMD_THREADS pmd threads. */ >> can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, >> NR_PMD_THREADS); >> + pmd = xzalloc(can_have * sizeof(*pmd)); >> for (i = 0; i < can_have; i++) { >> - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); >> unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id); >> - >> - dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); >> + dp_netdev_configure_pmd(&pmd[i], dp, i, core_id, numa_id); >> + } >> + /* The pmd thread code needs to see all the others configured pmd >> + * threads on the same numa node. That's why we call >> + * 'dp_netdev_configure_pmd()' on all the threads and then we >> actually >> + * start them. */ >> + for (i = 0; i < can_have; i++) { >> /* Each thread will distribute all devices rx-queues among >> * themselves. */ >> - pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); >> + pmd[i].thread = ovs_thread_create("pmd", pmd_thread_main, >> &pmd[i]); >> } >> VLOG_INFO("Created %d pmd threads on numa node %d", can_have, >> numa_id); >> + dp_netdev_reload_pmds(dp); >> } >> } >> >> >> On 24.07.2015 19:42, Daniele Di Proietto wrote: >>> That's a bad race condition, thanks for reporting it! >>> >>> Regarding the fix, I agree that reloading the threads would >>> restore the correct mapping, but it would still allow >>> the threads to run with the incorrect mapping for a brief >>> interval. >>> >>> How about postponing the actual threads creation until all >>> the pmds are configured? >>> >>> Something like: >>> >>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>> index 79c4612..26d9f1f 100644 >>> --- a/lib/dpif-netdev.c >>> +++ b/lib/dpif-netdev.c >>> @@ -2961,6 +2961,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int >>> numa_id) >>> * pmd threads for the numa node. */ >>> if (!n_pmds) { >>> int can_have, n_unpinned, i; >>> + struct dp_netdev_pmd_thread *pmd; >>> >>> n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id); >>> if (!n_unpinned) { >>> @@ -2973,13 +2974,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, >>> int numa_id) >>> * tries creating NR_PMD_THREADS pmd threads. */ >>> can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, >>> NR_PMD_THREADS); >>> for (i = 0; i < can_have; i++) { >>> - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); >>> unsigned core_id = >>> ovs_numa_get_unpinned_core_on_numa(numa_id); >>> >>> + pmd = xzalloc(sizeof *pmd); >>> dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); >>> - /* Each thread will distribute all devices rx-queues among >>> - * themselves. */ >>> - pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); >>> + } >>> + >>> + /* The pmd thread code needs to see all the others configured pmd >>> + * threads on the same numa node. That's why we call >>> + * 'dp_netdev_configure_pmd()' on all the threads and then we >>> actually >>> + * start them. */ >>> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { >>> + if (pmd->numa_id == numa_id) { >>> + /* Each thread will distribute all devices rx-queues among >>> + * themselves. */ >>> + pmd->thread = ovs_thread_create("pmd", pmd_thread_main, >>> pmd); >>> + } >>> } >>> VLOG_INFO("Created %d pmd threads on numa node %d", can_have, >>> numa_id); >>> } >>> >>> >>> What do you think? >>> >>> Thanks! >>> >>> On 24/07/2015 12:18, "Ilya Maximets" <i.maxim...@samsung.com> wrote: >>> >>>> Currently pmd threads select queues in pmd_load_queues() according to >>>> get_n_pmd_threads_on_numa(). This behavior leads to race between pmds, >>>> beacause dp_netdev_set_pmds_on_numa() starts them one by one and >>>> current number of threads changes incrementally. >>>> >>>> As a result we may have the following situation with 2 pmd threads: >>>> >>>> * dp_netdev_set_pmds_on_numa() >>>> * pmd12 thread started. Currently only 1 pmd thread exists. >>>> dpif_netdev(pmd12)|INFO|Core 1 processing port 'port_1' >>>> dpif_netdev(pmd12)|INFO|Core 1 processing port 'port_2' >>>> * pmd14 thread started. 2 pmd threads exists. >>>> dpif_netdev|INFO|Created 2 pmd threads on numa node 0 >>>> dpif_netdev(pmd14)|INFO|Core 2 processing port 'port_2' >>>> >>>> We have: >>>> core 1 --> port 1, port 2 >>>> core 2 --> port 2 >>>> >>>> Fix this by reloading all pmds to get right port mapping. >>>> >>>> If we reload pmds, we'll have: >>>> core 1 --> port 1 >>>> core 2 --> port 2 >>>> >>>> Cc: Dyasly Sergey <s.dya...@samsung.com> >>>> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> >>>> --- >>>> lib/dpif-netdev.c | 6 +++--- >>>> 1 file changed, 3 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>>> index 2958d52..fd700f9 100644 >>>> --- a/lib/dpif-netdev.c >>>> +++ b/lib/dpif-netdev.c >>>> @@ -1127,10 +1127,9 @@ do_add_port(struct dp_netdev *dp, const char >>>> *devname, const char *type, >>>> ovs_refcount_init(&port->ref_cnt); >>>> cmap_insert(&dp->ports, &port->node, hash_port_no(port_no)); >>>> >>>> - if (netdev_is_pmd(netdev)) { >>>> + if (netdev_is_pmd(netdev)) >>>> dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev)); >>>> - dp_netdev_reload_pmds(dp); >>>> - } >>>> + >>>> seq_change(dp->port_seq); >>>> >>>> return 0; >>>> @@ -2978,6 +2977,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, >>>> int numa_id) >>>> pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); >>>> } >>>> VLOG_INFO("Created %d pmd threads on numa node %d", can_have, >>>> numa_id); >>>> + dp_netdev_reload_pmds(dp); >>>> } >>>> } >>>> >>>> -- >>>> 2.1.4 >>>> >>>> _______________________________________________ >>>> dev mailing list >>>> dev@openvswitch.org >>>> https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma >>>> n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Sm >>>> B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=YcYFmCgJtVWAOpEYl7OaM4nqi7maE7 >>>> XOiMnLOpnugHY&s=Zf_P-5VtI2yJNO-f1ZXolxD5syuhr7WdbyNvdwH3zQM&e= >>> >>> _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev