Re: [ovs-dev] [PATCH v7 05/16] dpif-netdev: Fix race condition in pmd thread initialization.
On 19/04/2016 02:48, "Ilya Maximets"wrote: >On 19.04.2016 10:18, Ilya Maximets wrote: >> There was a reason for 2 calls for dp_netdev_pmd_reload_done() inside >> pmd_thread_main(). The reason is that we must wait until PMD thread >> completely done with reloading. This patch introduces race condition >> for pmd->exit_latch. While removing last port on numa node >> dp_netdev_reload_pmd__(pmd) will be called twice for each port. >> First call to remove port and second to destroy PMD thread. >> pmd->exit_latch setted between this two calls. This leads to probable >> situation when PMD thread will exit while processing first reloading. >> Main thread will wait forever on cond_wait in second reload in this >> case. Situation is easily reproducible by addition/deletion of last >> port (may be after few iterations in a cycle). >> >> Best regards, Ilya Maximets. > >This incremental should help: >-- >diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >index 588d56f..2235297 100644 >--- a/lib/dpif-netdev.c >+++ b/lib/dpif-netdev.c >@@ -2785,6 +2785,7 @@ pmd_thread_main(void *f_) > unsigned int port_seq = PMD_INITIAL_SEQ; > int poll_cnt; > int i; >+bool exiting; > > poll_cnt = 0; > poll_list = NULL; >@@ -2825,14 +2826,15 @@ reload: > } > } > >+emc_cache_uninit(>flow_cache); >+ > poll_cnt = pmd_load_queues_and_ports(pmd, _list); >+exiting = latch_is_set(>exit_latch); > /* Signal here to make sure the pmd finishes > * reloading the updated configuration. */ > dp_netdev_pmd_reload_done(pmd); > >-emc_cache_uninit(>flow_cache); >- >-if (!latch_is_set(>exit_latch)){ >+if (!exiting) { > goto reload; > } > >-- You're right, thanks for the detailed analysis and the suggested fix. I applied the suggested incremental, but kept emc_cache_uninit() where it is right now. I sent an updated version here: http://openvswitch.org/pipermail/dev/2016-April/069835.html Thanks, Daniele > > >> On 08.04.2016 06:13, Daniele Di Proietto wrote: >>> The pmds and the main threads are synchronized using a condition >>> variable. The main thread writes a new configuration, then it waits on >>> the condition variable. A pmd thread reads the new configuration, then >>> it calls signal() on the condition variable. To make sure that the pmds >>> and the main thread have a consistent view, each signal() should be >>> backed by a wait(). >>> >>> Currently the first signal() doesn't have a corresponding wait(). If >>> the pmd thread takes a long time to start and the signal() is received >>> by a later wait, the threads will have an inconsistent view. >>> >>> The commit fixes the problem by removing the first signal() from the >>> pmd thread. >>> >>> This is hardly a problem on current master, because the main thread >>> will call the first wait() a long time after the creation of a pmd >>> thread. It becomes a problem with the next commits. >>> >>> Signed-off-by: Daniele Di Proietto >>> --- >>> lib/dpif-netdev.c | 21 + >>> 1 file changed, 9 insertions(+), 12 deletions(-) >>> >>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>> index 9c32c64..2424d3e 100644 >>> --- a/lib/dpif-netdev.c >>> +++ b/lib/dpif-netdev.c >>> @@ -2652,21 +2652,22 @@ dpif_netdev_wait(struct dpif *dpif) >>> >>> static int >>> pmd_load_queues(struct dp_netdev_pmd_thread *pmd, struct rxq_poll >>>**ppoll_list) >>> -OVS_REQUIRES(pmd->poll_mutex) >>> { >>> struct rxq_poll *poll_list = *ppoll_list; >>> struct rxq_poll *poll; >>> int i; >>> >>> +ovs_mutex_lock(>poll_mutex); >>> poll_list = xrealloc(poll_list, pmd->poll_cnt * sizeof >>>*poll_list); >>> >>> i = 0; >>> LIST_FOR_EACH (poll, node, >poll_list) { >>> poll_list[i++] = *poll; >>> } >>> +ovs_mutex_unlock(>poll_mutex); >>> >>> *ppoll_list = poll_list; >>> -return pmd->poll_cnt; >>> +return i; >>> } >>> >>> static void * >>> @@ -2685,13 +2686,10 @@ pmd_thread_main(void *f_) >>> /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ >>> ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); >>> pmd_thread_setaffinity_cpu(pmd->core_id); >>> +poll_cnt = pmd_load_queues(pmd, _list); >>> reload: >>> emc_cache_init(>flow_cache); >>> >>> -ovs_mutex_lock(>poll_mutex); >>> -poll_cnt = pmd_load_queues(pmd, _list); >>> -ovs_mutex_unlock(>poll_mutex); >>> - >>> /* List port/core affinity */ >>> for (i = 0; i < poll_cnt; i++) { >>> VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n", >>> @@ -2699,10 +2697,6 @@ reload: >>> netdev_rxq_get_queue_id(poll_list[i].rx)); >>> } >>> >>> -/* Signal here to make sure the pmd finishes >>> - * reloading the updated configuration. */ >>> -
Re: [ovs-dev] [PATCH v7 05/16] dpif-netdev: Fix race condition in pmd thread initialization.
On 19.04.2016 10:18, Ilya Maximets wrote: > There was a reason for 2 calls for dp_netdev_pmd_reload_done() inside > pmd_thread_main(). The reason is that we must wait until PMD thread > completely done with reloading. This patch introduces race condition > for pmd->exit_latch. While removing last port on numa node > dp_netdev_reload_pmd__(pmd) will be called twice for each port. > First call to remove port and second to destroy PMD thread. > pmd->exit_latch setted between this two calls. This leads to probable > situation when PMD thread will exit while processing first reloading. > Main thread will wait forever on cond_wait in second reload in this > case. Situation is easily reproducible by addition/deletion of last > port (may be after few iterations in a cycle). > > Best regards, Ilya Maximets. This incremental should help: -- diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 588d56f..2235297 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2785,6 +2785,7 @@ pmd_thread_main(void *f_) unsigned int port_seq = PMD_INITIAL_SEQ; int poll_cnt; int i; +bool exiting; poll_cnt = 0; poll_list = NULL; @@ -2825,14 +2826,15 @@ reload: } } +emc_cache_uninit(>flow_cache); + poll_cnt = pmd_load_queues_and_ports(pmd, _list); +exiting = latch_is_set(>exit_latch); /* Signal here to make sure the pmd finishes * reloading the updated configuration. */ dp_netdev_pmd_reload_done(pmd); -emc_cache_uninit(>flow_cache); - -if (!latch_is_set(>exit_latch)){ +if (!exiting) { goto reload; } -- > On 08.04.2016 06:13, Daniele Di Proietto wrote: >> The pmds and the main threads are synchronized using a condition >> variable. The main thread writes a new configuration, then it waits on >> the condition variable. A pmd thread reads the new configuration, then >> it calls signal() on the condition variable. To make sure that the pmds >> and the main thread have a consistent view, each signal() should be >> backed by a wait(). >> >> Currently the first signal() doesn't have a corresponding wait(). If >> the pmd thread takes a long time to start and the signal() is received >> by a later wait, the threads will have an inconsistent view. >> >> The commit fixes the problem by removing the first signal() from the >> pmd thread. >> >> This is hardly a problem on current master, because the main thread >> will call the first wait() a long time after the creation of a pmd >> thread. It becomes a problem with the next commits. >> >> Signed-off-by: Daniele Di Proietto>> --- >> lib/dpif-netdev.c | 21 + >> 1 file changed, 9 insertions(+), 12 deletions(-) >> >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index 9c32c64..2424d3e 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -2652,21 +2652,22 @@ dpif_netdev_wait(struct dpif *dpif) >> >> static int >> pmd_load_queues(struct dp_netdev_pmd_thread *pmd, struct rxq_poll >> **ppoll_list) >> -OVS_REQUIRES(pmd->poll_mutex) >> { >> struct rxq_poll *poll_list = *ppoll_list; >> struct rxq_poll *poll; >> int i; >> >> +ovs_mutex_lock(>poll_mutex); >> poll_list = xrealloc(poll_list, pmd->poll_cnt * sizeof *poll_list); >> >> i = 0; >> LIST_FOR_EACH (poll, node, >poll_list) { >> poll_list[i++] = *poll; >> } >> +ovs_mutex_unlock(>poll_mutex); >> >> *ppoll_list = poll_list; >> -return pmd->poll_cnt; >> +return i; >> } >> >> static void * >> @@ -2685,13 +2686,10 @@ pmd_thread_main(void *f_) >> /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ >> ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); >> pmd_thread_setaffinity_cpu(pmd->core_id); >> +poll_cnt = pmd_load_queues(pmd, _list); >> reload: >> emc_cache_init(>flow_cache); >> >> -ovs_mutex_lock(>poll_mutex); >> -poll_cnt = pmd_load_queues(pmd, _list); >> -ovs_mutex_unlock(>poll_mutex); >> - >> /* List port/core affinity */ >> for (i = 0; i < poll_cnt; i++) { >> VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n", >> @@ -2699,10 +2697,6 @@ reload: >> netdev_rxq_get_queue_id(poll_list[i].rx)); >> } >> >> -/* Signal here to make sure the pmd finishes >> - * reloading the updated configuration. */ >> -dp_netdev_pmd_reload_done(pmd); >> - >> for (;;) { >> for (i = 0; i < poll_cnt; i++) { >> dp_netdev_process_rxq_port(pmd, poll_list[i].port, >> poll_list[i].rx); >> @@ -2725,14 +2719,17 @@ reload: >> } >> } >> >> +poll_cnt = pmd_load_queues(pmd, _list); >> +/* Signal here to make sure the pmd finishes >> + * reloading the updated configuration. */ >> +dp_netdev_pmd_reload_done(pmd); >> + >>
Re: [ovs-dev] [PATCH v7 05/16] dpif-netdev: Fix race condition in pmd thread initialization.
There was a reason for 2 calls for dp_netdev_pmd_reload_done() inside pmd_thread_main(). The reason is that we must wait until PMD thread completely done with reloading. This patch introduces race condition for pmd->exit_latch. While removing last port on numa node dp_netdev_reload_pmd__(pmd) will be called twice for each port. First call to remove port and second to destroy PMD thread. pmd->exit_latch setted between this two calls. This leads to probable situation when PMD thread will exit while processing first reloading. Main thread will wait forever on cond_wait in second reload in this case. Situation is easily reproducible by addition/deletion of last port (may be after few iterations in a cycle). Best regards, Ilya Maximets. On 08.04.2016 06:13, Daniele Di Proietto wrote: > The pmds and the main threads are synchronized using a condition > variable. The main thread writes a new configuration, then it waits on > the condition variable. A pmd thread reads the new configuration, then > it calls signal() on the condition variable. To make sure that the pmds > and the main thread have a consistent view, each signal() should be > backed by a wait(). > > Currently the first signal() doesn't have a corresponding wait(). If > the pmd thread takes a long time to start and the signal() is received > by a later wait, the threads will have an inconsistent view. > > The commit fixes the problem by removing the first signal() from the > pmd thread. > > This is hardly a problem on current master, because the main thread > will call the first wait() a long time after the creation of a pmd > thread. It becomes a problem with the next commits. > > Signed-off-by: Daniele Di Proietto> --- > lib/dpif-netdev.c | 21 + > 1 file changed, 9 insertions(+), 12 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 9c32c64..2424d3e 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -2652,21 +2652,22 @@ dpif_netdev_wait(struct dpif *dpif) > > static int > pmd_load_queues(struct dp_netdev_pmd_thread *pmd, struct rxq_poll > **ppoll_list) > -OVS_REQUIRES(pmd->poll_mutex) > { > struct rxq_poll *poll_list = *ppoll_list; > struct rxq_poll *poll; > int i; > > +ovs_mutex_lock(>poll_mutex); > poll_list = xrealloc(poll_list, pmd->poll_cnt * sizeof *poll_list); > > i = 0; > LIST_FOR_EACH (poll, node, >poll_list) { > poll_list[i++] = *poll; > } > +ovs_mutex_unlock(>poll_mutex); > > *ppoll_list = poll_list; > -return pmd->poll_cnt; > +return i; > } > > static void * > @@ -2685,13 +2686,10 @@ pmd_thread_main(void *f_) > /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ > ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); > pmd_thread_setaffinity_cpu(pmd->core_id); > +poll_cnt = pmd_load_queues(pmd, _list); > reload: > emc_cache_init(>flow_cache); > > -ovs_mutex_lock(>poll_mutex); > -poll_cnt = pmd_load_queues(pmd, _list); > -ovs_mutex_unlock(>poll_mutex); > - > /* List port/core affinity */ > for (i = 0; i < poll_cnt; i++) { > VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n", > @@ -2699,10 +2697,6 @@ reload: > netdev_rxq_get_queue_id(poll_list[i].rx)); > } > > -/* Signal here to make sure the pmd finishes > - * reloading the updated configuration. */ > -dp_netdev_pmd_reload_done(pmd); > - > for (;;) { > for (i = 0; i < poll_cnt; i++) { > dp_netdev_process_rxq_port(pmd, poll_list[i].port, > poll_list[i].rx); > @@ -2725,14 +2719,17 @@ reload: > } > } > > +poll_cnt = pmd_load_queues(pmd, _list); > +/* Signal here to make sure the pmd finishes > + * reloading the updated configuration. */ > +dp_netdev_pmd_reload_done(pmd); > + > emc_cache_uninit(>flow_cache); > > if (!latch_is_set(>exit_latch)){ > goto reload; > } > > -dp_netdev_pmd_reload_done(pmd); > - > free(poll_list); > return NULL; > } > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH v7 05/16] dpif-netdev: Fix race condition in pmd thread initialization.
The pmds and the main threads are synchronized using a condition variable. The main thread writes a new configuration, then it waits on the condition variable. A pmd thread reads the new configuration, then it calls signal() on the condition variable. To make sure that the pmds and the main thread have a consistent view, each signal() should be backed by a wait(). Currently the first signal() doesn't have a corresponding wait(). If the pmd thread takes a long time to start and the signal() is received by a later wait, the threads will have an inconsistent view. The commit fixes the problem by removing the first signal() from the pmd thread. This is hardly a problem on current master, because the main thread will call the first wait() a long time after the creation of a pmd thread. It becomes a problem with the next commits. Signed-off-by: Daniele Di Proietto--- lib/dpif-netdev.c | 21 + 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 9c32c64..2424d3e 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2652,21 +2652,22 @@ dpif_netdev_wait(struct dpif *dpif) static int pmd_load_queues(struct dp_netdev_pmd_thread *pmd, struct rxq_poll **ppoll_list) -OVS_REQUIRES(pmd->poll_mutex) { struct rxq_poll *poll_list = *ppoll_list; struct rxq_poll *poll; int i; +ovs_mutex_lock(>poll_mutex); poll_list = xrealloc(poll_list, pmd->poll_cnt * sizeof *poll_list); i = 0; LIST_FOR_EACH (poll, node, >poll_list) { poll_list[i++] = *poll; } +ovs_mutex_unlock(>poll_mutex); *ppoll_list = poll_list; -return pmd->poll_cnt; +return i; } static void * @@ -2685,13 +2686,10 @@ pmd_thread_main(void *f_) /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); pmd_thread_setaffinity_cpu(pmd->core_id); +poll_cnt = pmd_load_queues(pmd, _list); reload: emc_cache_init(>flow_cache); -ovs_mutex_lock(>poll_mutex); -poll_cnt = pmd_load_queues(pmd, _list); -ovs_mutex_unlock(>poll_mutex); - /* List port/core affinity */ for (i = 0; i < poll_cnt; i++) { VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n", @@ -2699,10 +2697,6 @@ reload: netdev_rxq_get_queue_id(poll_list[i].rx)); } -/* Signal here to make sure the pmd finishes - * reloading the updated configuration. */ -dp_netdev_pmd_reload_done(pmd); - for (;;) { for (i = 0; i < poll_cnt; i++) { dp_netdev_process_rxq_port(pmd, poll_list[i].port, poll_list[i].rx); @@ -2725,14 +2719,17 @@ reload: } } +poll_cnt = pmd_load_queues(pmd, _list); +/* Signal here to make sure the pmd finishes + * reloading the updated configuration. */ +dp_netdev_pmd_reload_done(pmd); + emc_cache_uninit(>flow_cache); if (!latch_is_set(>exit_latch)){ goto reload; } -dp_netdev_pmd_reload_done(pmd); - free(poll_list); return NULL; } -- 2.1.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev