Previous diff was completely wrong. Sorry.

@@ -2971,6 +2970,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int 
numa_id)
      * pmd threads for the numa node. */
     if (!n_pmds) {
         int can_have, n_unpinned, i;
+        struct dp_netdev_pmd_thread **pmds;
 
         n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
         if (!n_unpinned) {
@@ -2982,15 +2982,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int 
numa_id)
         /* If cpu mask is specified, uses all unpinned cores, otherwise
          * tries creating NR_PMD_THREADS pmd threads. */
         can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, 
NR_PMD_THREADS);
+        pmds = xzalloc(can_have * sizeof(struct dp_netdev_pmd_thread*));
         for (i = 0; i < can_have; i++) {
-            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
             unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
-
-            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
+            pmds[i] = xzalloc(sizeof(struct dp_netdev_pmd_thread));
+            dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id);
+        }
+        /* The pmd thread code needs to see all the others configured pmd
+         * threads on the same numa node.  That's why we call
+         * 'dp_netdev_configure_pmd()' on all the threads and then we actually
+         * start them. */
+        for (i = 0; i < can_have; i++) {
             /* Each thread will distribute all devices rx-queues among
              * themselves. */
-            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
+            pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main, 
pmds[i]);
         }
+        free(pmds);
         VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id);
     }
 }


On 27.07.2015 11:17, Ilya Maximets wrote:
> Sorry,
> without dp_netdev_reload_pmds(dp) at the end.
> 
> On 27.07.2015 10:58, Ilya Maximets wrote:
>> Yes, I think, this way is better. But it may be more simple
>> if we just keep all the dp_netdev_pmd_thread structures.
>> There is no need to search over all pmd threads on system.
>>
>> Like this:
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 79c4612..8e4c025 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -2961,6 +2960,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int 
>> numa_id)
>>       * pmd threads for the numa node. */
>>      if (!n_pmds) {
>>          int can_have, n_unpinned, i;
>> +        struct dp_netdev_pmd_thread *pmd;
>>  
>>          n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
>>          if (!n_unpinned) {
>> @@ -2972,16 +2972,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int 
>> numa_id)
>>          /* If cpu mask is specified, uses all unpinned cores, otherwise
>>           * tries creating NR_PMD_THREADS pmd threads. */
>>          can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, 
>> NR_PMD_THREADS);
>> +        pmd = xzalloc(can_have * sizeof(*pmd));
>>          for (i = 0; i < can_have; i++) {
>> -            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>>              unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
>> -
>> -            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
>> +            dp_netdev_configure_pmd(&pmd[i], dp, i, core_id, numa_id);
>> +        }
>> +        /* The pmd thread code needs to see all the others configured pmd
>> +         * threads on the same numa node.  That's why we call
>> +         * 'dp_netdev_configure_pmd()' on all the threads and then we 
>> actually
>> +         * start them. */
>> +        for (i = 0; i < can_have; i++) {
>>              /* Each thread will distribute all devices rx-queues among
>>               * themselves. */
>> -            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
>> +            pmd[i].thread = ovs_thread_create("pmd", pmd_thread_main, 
>> &pmd[i]);
>>          }
>>          VLOG_INFO("Created %d pmd threads on numa node %d", can_have, 
>> numa_id);
>> +        dp_netdev_reload_pmds(dp);
>>      }
>>  }
>>
>>
>> On 24.07.2015 19:42, Daniele Di Proietto wrote:
>>> That's a bad race condition, thanks for reporting it!
>>>
>>> Regarding the fix, I agree that reloading the threads would
>>> restore the correct mapping, but it would still allow
>>> the threads to run with the incorrect mapping for a brief
>>> interval.
>>>
>>> How about postponing the actual threads creation until all
>>> the pmds are configured?
>>>
>>> Something like:
>>>
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>> index 79c4612..26d9f1f 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -2961,6 +2961,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int
>>> numa_id)
>>>       * pmd threads for the numa node. */
>>>      if (!n_pmds) {
>>>          int can_have, n_unpinned, i;
>>> +        struct dp_netdev_pmd_thread *pmd;
>>>
>>>          n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
>>>          if (!n_unpinned) {
>>> @@ -2973,13 +2974,22 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp,
>>> int numa_id)
>>>           * tries creating NR_PMD_THREADS pmd threads. */
>>>          can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned,
>>> NR_PMD_THREADS);
>>>          for (i = 0; i < can_have; i++) {
>>> -            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>>>              unsigned core_id =
>>> ovs_numa_get_unpinned_core_on_numa(numa_id);
>>>
>>> +            pmd = xzalloc(sizeof *pmd);
>>>              dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
>>> -            /* Each thread will distribute all devices rx-queues among
>>> -             * themselves. */
>>> -            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
>>> +        }
>>> +
>>> +        /* The pmd thread code needs to see all the others configured pmd
>>> +         * threads on the same numa node.  That's why we call
>>> +         * 'dp_netdev_configure_pmd()' on all the threads and then we
>>> actually
>>> +         * start them. */
>>> +        CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>>> +            if (pmd->numa_id == numa_id) {
>>> +                /* Each thread will distribute all devices rx-queues among
>>> +                 * themselves. */
>>> +                pmd->thread = ovs_thread_create("pmd", pmd_thread_main,
>>> pmd);
>>> +            }
>>>          }
>>>          VLOG_INFO("Created %d pmd threads on numa node %d", can_have,
>>> numa_id);
>>>      }
>>>
>>>
>>> What do you think?
>>>
>>> Thanks!
>>>
>>> On 24/07/2015 12:18, "Ilya Maximets" <i.maxim...@samsung.com> wrote:
>>>
>>>> Currently pmd threads select queues in pmd_load_queues() according to
>>>> get_n_pmd_threads_on_numa(). This behavior leads to race between pmds,
>>>> beacause dp_netdev_set_pmds_on_numa() starts them one by one and
>>>> current number of threads changes incrementally.
>>>>
>>>> As a result we may have the following situation with 2 pmd threads:
>>>>
>>>> * dp_netdev_set_pmds_on_numa()
>>>> * pmd12 thread started. Currently only 1 pmd thread exists.
>>>> dpif_netdev(pmd12)|INFO|Core 1 processing port 'port_1'
>>>> dpif_netdev(pmd12)|INFO|Core 1 processing port 'port_2'
>>>> * pmd14 thread started. 2 pmd threads exists.
>>>> dpif_netdev|INFO|Created 2 pmd threads on numa node 0
>>>> dpif_netdev(pmd14)|INFO|Core 2 processing port 'port_2'
>>>>
>>>> We have:
>>>> core 1 --> port 1, port 2
>>>> core 2 --> port 2
>>>>
>>>> Fix this by reloading all pmds to get right port mapping.
>>>>
>>>> If we reload pmds, we'll have:
>>>> core 1 --> port 1
>>>> core 2 --> port 2
>>>>
>>>> Cc: Dyasly Sergey <s.dya...@samsung.com>
>>>> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>
>>>> ---
>>>> lib/dpif-netdev.c | 6 +++---
>>>> 1 file changed, 3 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>>> index 2958d52..fd700f9 100644
>>>> --- a/lib/dpif-netdev.c
>>>> +++ b/lib/dpif-netdev.c
>>>> @@ -1127,10 +1127,9 @@ do_add_port(struct dp_netdev *dp, const char
>>>> *devname, const char *type,
>>>>     ovs_refcount_init(&port->ref_cnt);
>>>>     cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
>>>>
>>>> -    if (netdev_is_pmd(netdev)) {
>>>> +    if (netdev_is_pmd(netdev))
>>>>         dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev));
>>>> -        dp_netdev_reload_pmds(dp);
>>>> -    }
>>>> +
>>>>     seq_change(dp->port_seq);
>>>>
>>>>     return 0;
>>>> @@ -2978,6 +2977,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp,
>>>> int numa_id)
>>>>             pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
>>>>         }
>>>>         VLOG_INFO("Created %d pmd threads on numa node %d", can_have,
>>>> numa_id);
>>>> +        dp_netdev_reload_pmds(dp);
>>>>     }
>>>> }
>>>>
>>>> -- 
>>>> 2.1.4
>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org
>>>> https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma
>>>> n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Sm
>>>> B5nZacmXNq0gKCC1s_Cw5yUNjxgD4v5kJqZ2uWLlE&m=YcYFmCgJtVWAOpEYl7OaM4nqi7maE7
>>>> XOiMnLOpnugHY&s=Zf_P-5VtI2yJNO-f1ZXolxD5syuhr7WdbyNvdwH3zQM&e= 
>>>
>>>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to