This patch also modifies the put path. Let me know if this works:
diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c b/ompi/mca/pml/ob1/pml_ob1_rdma.c
index 888e126..a3ec6f8 100644
--- a/ompi/mca/pml/ob1/pml_ob1_rdma.c
+++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c
@@ -42,6 +42,7 @@ size_t mca_pml_ob1_rdma_btls(
mca_pml_ob1_com_btl_t* rdma_btls)
{
int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
+ int num_eager_btls = mca_bml_base_btl_array_get_size
(&bml_endpoint->btl_eager);
double weight_total = 0;
int num_btls_used = 0;
@@ -57,6 +58,21 @@ size_t mca_pml_ob1_rdma_btls(
(bml_endpoint->btl_rdma_index + n) % num_btls);
mca_btl_base_registration_handle_t *reg_handle = NULL;
mca_btl_base_module_t *btl = bml_btl->btl;
+ bool ignore = true;
+
+ /* do not use rdma btls that are not in the eager list. this is
necessary to avoid using
+ * btls that exist on the endpoint only to support RMA. */
+ for (int i = 0 ; i < num_eager_btls ; ++i) {
+ mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index
(&bml_endpoint->btl_eager, i);
+ if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
+ ignore = false;
+ break;
+ }
+ }
+
+ if (ignore) {
+ continue;
+ }
if (btl->btl_register_mem) {
/* do not use the RDMA protocol with this btl if 1) leave pinned
is disabled,
@@ -99,18 +115,34 @@ size_t mca_pml_ob1_rdma_pipeline_btls(
mca_bml_base_endpoint_t* bml_endpoint,
size_t size,
mca_pml_ob1_com_btl_t* rdma_btls )
{
- int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
+ int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
+ int num_eager_btls = mca_bml_base_btl_array_get_size
(&bml_endpoint->btl_eager);
double weight_total = 0;
+ int rdma_count = 0;
- for(i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
- rdma_btls[i].bml_btl =
- mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
- rdma_btls[i].btl_reg = NULL;
+ for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
+ mca_bml_base_btl_t *bml_btl =
mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
+ bool ignore = true;
+
+ for (int i = 0 ; i < num_eager_btls ; ++i) {
+ mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index
(&bml_endpoint->btl_eager, i);
+ if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
+ ignore = false;
+ break;
+ }
+ }
- weight_total += rdma_btls[i].bml_btl->btl_weight;
+ if (ignore) {
+ continue;
+ }
+
+ rdma_btls[rdma_count].bml_btl = bml_btl;
+ rdma_btls[rdma_count++].btl_reg = NULL;
+
+ weight_total += bml_btl->btl_weight;
}
- mca_pml_ob1_calc_weighted_length(rdma_btls, i, size, weight_total);
+ mca_pml_ob1_calc_weighted_length (rdma_btls, rdma_count, size,
weight_total);
- return i;
+ return rdma_count;
}
> On Aug 7, 2016, at 6:51 PM, Nathan Hjelm <[email protected]> wrote:
>
> Looks like the put path probably needs a similar patch. Will send another
> patch soon.
>
>> On Aug 7, 2016, at 6:01 PM, [email protected] wrote:
>>
>> Hi,
>>
>> I applied the patch to the file "pml_ob1_rdma.c" and ran osu_bw again.
>> Then, I still see the bad performance for larger size(>=2097152 ).
>>
>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -report-bindings
>> osu_bw
>> [manage.cluster:27444] MCW rank 0 bound to socket 0[core 0[hwt 0]], socket
>> 0[core 1[hwt 0]], socket 0[core 2[hwt 0]], so
>> cket 0[core 3[hwt 0]], socket 0[core 4[hwt 0]], socket 0[core 5[hwt 0]]:
>> [B/B/B/B/B/B][./././././.]
>> [manage.cluster:27444] MCW rank 1 bound to socket 0[core 0[hwt 0]], socket
>> 0[core 1[hwt 0]], socket 0[core 2[hwt 0]], so
>> cket 0[core 3[hwt 0]], socket 0[core 4[hwt 0]], socket 0[core 5[hwt 0]]:
>> [B/B/B/B/B/B][./././././.]
>> # OSU MPI Bandwidth Test v3.1.1
>> # Size Bandwidth (MB/s)
>> 1 2.23
>> 2 4.52
>> 4 8.82
>> 8 17.83
>> 16 35.31
>> 32 69.49
>> 64 109.46
>> 128 178.51
>> 256 307.68
>> 512 532.64
>> 1024 909.34
>> 2048 1583.95
>> 4096 1554.74
>> 8192 2120.31
>> 16384 2489.79
>> 32768 2853.66
>> 65536 3692.82
>> 131072 4236.67
>> 262144 4575.63
>> 524288 4778.47
>> 1048576 4839.34
>> 2097152 2231.46
>> 4194304 1505.48
>>
>> Regards,
>>
>> Tetsuya Mishima
>>
>> 2016/08/06 0:00:08、"devel"さんは「Re: [OMPI devel] sm BTL performace of
>> the openmpi-2.0.0」で書きました
>>> Making ob1 ignore RDMA btls that are not in use for eager messages might
>> be sufficient. Please try the following patch and let me know if it works
>> for you.
>>>
>>> diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c
>> b/ompi/mca/pml/ob1/pml_ob1_rdma.c
>>> index 888e126..0c99525 100644
>>> --- a/ompi/mca/pml/ob1/pml_ob1_rdma.c
>>> +++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c
>>> @@ -42,6 +42,7 @@ size_t mca_pml_ob1_rdma_btls(
>>> mca_pml_ob1_com_btl_t* rdma_btls)
>>> {
>>> int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
>>> + int num_eager_btls = mca_bml_base_btl_array_get_size
>> (&bml_endpoint->btl_eager);
>>> double weight_total = 0;
>>> int num_btls_used = 0;
>>>
>>> @@ -57,6 +58,21 @@ size_t mca_pml_ob1_rdma_btls(
>>> (bml_endpoint->btl_rdma_index + n) % num_btls);
>>> mca_btl_base_registration_handle_t *reg_handle = NULL;
>>> mca_btl_base_module_t *btl = bml_btl->btl;
>>> + bool ignore = true;
>>> +
>>> + /* do not use rdma btls that are not in the eager list. this is
>> necessary to avoid using
>>> + * btls that exist on the endpoint only to support RMA. */
>>> + for (int i = 0 ; i < num_eager_btls ; ++i) {
>>> + mca_bml_base_btl_t *eager_btl =
>> mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i);
>>> + if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
>>> + ignore = false;
>>> + break;
>>> + }
>>> + }
>>> +
>>> + if (ignore) {
>>> + continue;
>>> + }
>>>
>>> if (btl->btl_register_mem) {
>>> /* do not use the RDMA protocol with this btl if 1) leave pinned is
>> disabled,
>>>
>>>
>>>
>>> -Nathan
>>>
>>>
>>>> On Aug 5, 2016, at 8:44 AM, Nathan Hjelm <[email protected]> wrote:
>>>>
>>>> Nope. We are not going to change the flags as this will disable the blt
>> for one-sided. Not sure what is going on here as the openib btl should be
>> 1) not used for pt2pt, and 2) polled infrequently.
>>> The btl debug log suggests both of these are the case. Not sure what is
>> going on yet.
>>>>
>>>> -Nathan
>>>>
>>>>> On Aug 5, 2016, at 8:16 AM, [email protected] wrote:
>>>>>
>>>>> Perhaps those flags need to be the default?
>>>>>
>>>>>
>>>>>> On Aug 5, 2016, at 7:14 AM, [email protected] wrote:
>>>>>>
>>>>>> Hi Christoph,
>>>>>>
>>>>>> I applied the commits - pull/#1250 as Nathan told me and added "-mca
>>>>>> btl_openib_flags 311" to the mpirun command line option, then it
>> worked for
>>>>>> me. I don't know the reason, but it looks ATOMIC_FOP in the
>>>>>> btl_openib_flags degrades the sm/vader perfomance.
>>>>>>
>>>>>> Regards,
>>>>>> Tetsuya Mishima
>>>>>>
>>>>>>
>>>>>> 2016/08/05 22:10:37、"devel"さんは「Re: [OMPI devel] sm BTL
>> performace of
>>>>>> the openmpi-2.0.0」で書きました
>>>>>>> Hello,
>>>>>>>
>>>>>>> We see the same problem here on various machines with Open MPI
>> 2.0.0.
>>>>>>> To us it seems that enabling the openib btl triggers bad performance
>> for
>>>>>> the sm AND vader btls!
>>>>>>> --mca btl_base_verbose 10 reports in both cases the correct use of
>> sm and
>>>>>> vader between MPI ranks - only performance differs?!
>>>>>>>
>>>>>>> One irritating thing I see in the log output is the following:
>>>>>>> openib BTL: rdmacm CPC unavailable for use on mlx4_0:1; skipped
>>>>>>> [rank=1] openib: using port mlx4_0:1
>>>>>>> select: init of component openib returned success
>>>>>>>
>>>>>>> Did not look into the "Skipped" code part yet, ...
>>>>>>>
>>>>>>> Results see below.
>>>>>>>
>>>>>>> Best regards
>>>>>>> Christoph Niethammer
>>>>>>>
>>>>>>> --
>>>>>>>
>>>>>>> Christoph Niethammer
>>>>>>> High Performance Computing Center Stuttgart (HLRS)
>>>>>>> Nobelstrasse 19
>>>>>>> 70569 Stuttgart
>>>>>>>
>>>>>>> Tel: ++49(0)711-685-87203
>>>>>>> email: [email protected]
>>>>>>> http://www.hlrs.de/people/niethammer
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> mpirun -np 2 --mca btl self,vader osu_bw
>>>>>>> # OSU MPI Bandwidth Test
>>>>>>> # Size Bandwidth (MB/s)
>>>>>>> 1 4.83
>>>>>>> 2 10.30
>>>>>>> 4 24.68
>>>>>>> 8 49.27
>>>>>>> 16 95.80
>>>>>>> 32 187.52
>>>>>>> 64 270.82
>>>>>>> 128 405.00
>>>>>>> 256 659.26
>>>>>>> 512 1165.14
>>>>>>> 1024 2372.83
>>>>>>> 2048 3592.85
>>>>>>> 4096 4283.51
>>>>>>> 8192 5523.55
>>>>>>> 16384 7388.92
>>>>>>> 32768 7024.37
>>>>>>> 65536 7353.79
>>>>>>> 131072 7465.96
>>>>>>> 262144 8597.56
>>>>>>> 524288 9292.86
>>>>>>> 1048576 9168.01
>>>>>>> 2097152 9009.62
>>>>>>> 4194304 9013.02
>>>>>>>
>>>>>>> mpirun -np 2 --mca btl self,vader,openib osu_bw
>>>>>>> # OSU MPI Bandwidth Test
>>>>>>> # Size Bandwidth (MB/s)
>>>>>>> 1 5.32
>>>>>>> 2 11.14
>>>>>>> 4 20.88
>>>>>>> 8 49.26
>>>>>>> 16 99.11
>>>>>>> 32 197.42
>>>>>>> 64 301.08
>>>>>>> 128 413.64
>>>>>>> 256 651.15
>>>>>>> 512 1161.12
>>>>>>> 1024 2460.99
>>>>>>> 2048 3627.36
>>>>>>> 4096 2191.06
>>>>>>> 8192 3118.36
>>>>>>> 16384 3428.45
>>>>>>> 32768 3676.96
>>>>>>> 65536 3709.65
>>>>>>> 131072 3748.64
>>>>>>> 262144 3764.88
>>>>>>> 524288 3764.61
>>>>>>> 1048576 3772.45
>>>>>>> 2097152 3757.37
>>>>>>> 4194304 3746.45
>>>>>>>
>>>>>>> mpirun -np 2 --mca btl self,sm osu_bw
>>>>>>> # OSU MPI Bandwidth Test
>>>>>>> # Size Bandwidth (MB/s)
>>>>>>> 1 2.98
>>>>>>> 2 5.97
>>>>>>> 4 11.99
>>>>>>> 8 23.47
>>>>>>> 16 50.64
>>>>>>> 32 99.91
>>>>>>> 64 197.87
>>>>>>> 128 343.32
>>>>>>> 256 667.48
>>>>>>> 512 1200.86
>>>>>>> 1024 2050.05
>>>>>>> 2048 3578.52
>>>>>>> 4096 3966.92
>>>>>>> 8192 5687.96
>>>>>>> 16384 7395.88
>>>>>>> 32768 7101.41
>>>>>>> 65536 7619.49
>>>>>>> 131072 7978.09
>>>>>>> 262144 8648.87
>>>>>>> 524288 9129.18
>>>>>>> 1048576 10525.31
>>>>>>> 2097152 10511.63
>>>>>>> 4194304 10489.66
>>>>>>>
>>>>>>> mpirun -np 2 --mca btl self,sm,openib osu_bw
>>>>>>> # OSU MPI Bandwidth Test
>>>>>>> # Size Bandwidth (MB/s)
>>>>>>> 1 2.02
>>>>>>> 2 3.00
>>>>>>> 4 9.99
>>>>>>> 8 19.96
>>>>>>> 16 40.10
>>>>>>> 32 70.63
>>>>>>> 64 144.08
>>>>>>> 128 282.21
>>>>>>> 256 543.55
>>>>>>> 512 1032.61
>>>>>>> 1024 1871.09
>>>>>>> 2048 3294.07
>>>>>>> 4096 2336.48
>>>>>>> 8192 3142.22
>>>>>>> 16384 3419.93
>>>>>>> 32768 3647.30
>>>>>>> 65536 3725.40
>>>>>>> 131072 3749.43
>>>>>>> 262144 3765.31
>>>>>>> 524288 3771.06
>>>>>>> 1048576 3772.54
>>>>>>> 2097152 3760.93
>>>>>>> 4194304 3745.37
>>>>>>>
>>>>>>> ----- Original Message -----
>>>>>>> From: [email protected]
>>>>>>> To: "Open MPI Developers" <[email protected]>
>>>>>>> Sent: Wednesday, July 27, 2016 6:04:48 AM
>>>>>>> Subject: Re: [OMPI devel] sm BTL performace of the openmpi-2.0.0
>>>>>>>
>>>>>>> HiNathan,
>>>>>>>
>>>>>>> I applied those commits and ran again without any BTL specified.
>>>>>>>
>>>>>>> Then, although it says "mca: bml: Using vader btl for send to
>>>>>> [[18993,1],1]
>>>>>>> on node manage",
>>>>>>> the osu_bw still shows it's very slow as shown below:
>>>>>>>
>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca
>>>>>> btl_base_verbose
>>>>>>> 10 -bind-to core -report-bindings osu_bw
>>>>>>> [manage.cluster:17482] MCW rank 0 bound to socket 0[core 0[hwt 0]]:
>>>>>>> [B/././././.][./././././.]
>>>>>>> [manage.cluster:17482] MCW rank 1 bound to socket 0[core 1[hwt 0]]:
>>>>>>> [./B/./././.][./././././.]
>>>>>>> [manage.cluster:17487] mca: base: components_register: registering
>>>>>>> framework btl components
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component self
>>>>>>> [manage.cluster:17487] mca: base: components_register: component
>> self
>>>>>>> register function successful
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component vader
>>>>>>> [manage.cluster:17488] mca: base: components_register: registering
>>>>>>> framework btl components
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component self
>>>>>>> [manage.cluster:17487] mca: base: components_register: component
>> vader
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: component
>> self
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component vader
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component tcp
>>>>>>> [manage.cluster:17488] mca: base: components_register: component
>> vader
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component tcp
>>>>>>> [manage.cluster:17487] mca: base: components_register: component tcp
>>>>>>> register function successful
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component sm
>>>>>>> [manage.cluster:17488] mca: base: components_register: component tcp
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component sm
>>>>>>> [manage.cluster:17487] mca: base: components_register: component sm
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: component sm
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component openib
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component openib
>>>>>>> [manage.cluster:17488] mca: base: components_register: component
>> openib
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: opening btl
>> components
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> self
>>>>>>> [manage.cluster:17488] mca: base: components_open: component self
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> vader
>>>>>>> [manage.cluster:17488] mca: base: components_open: component vader
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> tcp
>>>>>>> [manage.cluster:17488] mca: base: components_open: component tcp
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> sm
>>>>>>> [manage.cluster:17488] mca: base: components_open: component sm open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> openib
>>>>>>> [manage.cluster:17488] mca: base: components_open: component openib
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] select: initializing btl component self
>>>>>>> [manage.cluster:17488] select: init of component self returned
>> success
>>>>>>> [manage.cluster:17488] select: initializing btl component vader
>>>>>>> [manage.cluster:17487] mca: base: components_register: component
>> openib
>>>>>>> register function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: opening btl
>> components
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> self
>>>>>>> [manage.cluster:17487] mca: base: components_open: component self
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> vader
>>>>>>> [manage.cluster:17487] mca: base: components_open: component vader
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> tcp
>>>>>>> [manage.cluster:17487] mca: base: components_open: component tcp
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> sm
>>>>>>> [manage.cluster:17487] mca: base: components_open: component sm open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> openib
>>>>>>> [manage.cluster:17488] select: init of component vader returned
>> success
>>>>>>> [manage.cluster:17488] select: initializing btl component tcp
>>>>>>> [manage.cluster:17487] mca: base: components_open: component openib
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] select: initializing btl component self
>>>>>>> [manage.cluster:17487] select: init of component self returned
>> success
>>>>>>> [manage.cluster:17487] select: initializing btl component vader
>>>>>>> [manage.cluster:17488] select: init of component tcp returned
>> success
>>>>>>> [manage.cluster:17488] select: initializing btl component sm
>>>>>>> [manage.cluster:17488] select: init of component sm returned success
>>>>>>> [manage.cluster:17488] select: initializing btl component openib
>>>>>>> [manage.cluster:17487] select: init of component vader returned
>> success
>>>>>>> [manage.cluster:17487] select: initializing btl component tcp
>>>>>>> [manage.cluster:17487] select: init of component tcp returned
>> success
>>>>>>> [manage.cluster:17487] select: initializing btl component sm
>>>>>>> [manage.cluster:17488] Checking distance from this process to
>>>>>> device=mthca0
>>>>>>> [manage.cluster:17488] hwloc_distances->nbobjs=2
>>>>>>> [manage.cluster:17488] hwloc_distances->latency[0]=1.000000
>>>>>>> [manage.cluster:17488] hwloc_distances->latency[1]=1.600000
>>>>>>> [manage.cluster:17488] hwloc_distances->latency[2]=1.600000
>>>>>>> [manage.cluster:17488] hwloc_distances->latency[3]=1.000000
>>>>>>> [manage.cluster:17488] ibv_obj->type set to NULL
>>>>>>> [manage.cluster:17488] Process is bound: distance to device is
>> 0.000000
>>>>>>> [manage.cluster:17487] select: init of component sm returned success
>>>>>>> [manage.cluster:17487] select: initializing btl component openib
>>>>>>> [manage.cluster:17488] openib BTL: rdmacm CPC unavailable for use on
>>>>>>> mthca0:1; skipped
>>>>>>> [manage.cluster:17487] Checking distance from this process to
>>>>>> device=mthca0
>>>>>>> [manage.cluster:17487] hwloc_distances->nbobjs=2
>>>>>>> [manage.cluster:17487] hwloc_distances->latency[0]=1.000000
>>>>>>> [manage.cluster:17487] hwloc_distances->latency[1]=1.600000
>>>>>>> [manage.cluster:17487] hwloc_distances->latency[2]=1.600000
>>>>>>> [manage.cluster:17487] hwloc_distances->latency[3]=1.000000
>>>>>>> [manage.cluster:17487] ibv_obj->type set to NULL
>>>>>>> [manage.cluster:17487] Process is bound: distance to device is
>> 0.000000
>>>>>>> [manage.cluster:17488] [rank=1] openib: using port mthca0:1
>>>>>>> [manage.cluster:17488] select: init of component openib returned
>> success
>>>>>>> [manage.cluster:17487] openib BTL: rdmacm CPC unavailable for use on
>>>>>>> mthca0:1; skipped
>>>>>>> [manage.cluster:17487] [rank=0] openib: using port mthca0:1>>>>
>> [manage.cluster:17487] select: init of component openib returned success
>>>>>>> [manage.cluster:17488] mca: bml: Using self btl for send to
>> [[18993,1],1]
>>>>>>> on node manage
>>>>>>> [manage.cluster:17487] mca: bml: Using self btl for send to
>> [[18993,1],0]
>>>>>>> on node manage
>>>>>>> [manage.cluster:17488] mca: bml: Using vader btl for send to
>>>>>> [[18993,1],0]
>>>>>>> on node manage
>>>>>>> [manage.cluster:17487] mca: bml: Using vader btl for send to
>>>>>> [[18993,1],1]
>>>>>>> on node manage
>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>> # Size Bandwidth (MB/s)
>>>>>>> 1 1.76
>>>>>>> 2 3.53
>>>>>>> 4 7.06
>>>>>>> 8 14.46
>>>>>>> 16 29.12
>>>>>>> 32 57.54
>>>>>>> 64 100.12
>>>>>>> 128 157.78
>>>>>>> 256 277.32
>>>>>>> 512 477.53
>>>>>>> 1024 894.81
>>>>>>> 2048 1330.68
>>>>>>> 4096 278.58
>>>>>>> 8192 516.00
>>>>>>> 16384 762.99
>>>>>>> 32768 1037.19
>>>>>>> 65536 1181.66
>>>>>>> 131072 1261.91
>>>>>>> 262144 1237.39
>>>>>>> 524288 1247.86
>>>>>>> 1048576 1252.04
>>>>>>> 2097152 1273.46
>>>>>>> 4194304 1281.21
>>>>>>> [manage.cluster:17488] mca: base: close: component self closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component self
>>>>>>> [manage.cluster:17487] mca: base: close: component self closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component self
>>>>>>> [manage.cluster:17488] mca: base: close: component vader closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component vader
>>>>>>> [manage.cluster:17487] mca: base: close: component vader closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component vader
>>>>>>> [manage.cluster:17488] mca: base: close: component tcp closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component tcp
>>>>>>> [manage.cluster:17487] mca: base: close: component tcp closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component tcp
>>>>>>> [manage.cluster:17488] mca: base: close: component sm closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component sm
>>>>>>> [manage.cluster:17487] mca: base: close: component sm closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component sm
>>>>>>> [manage.cluster:17488] mca: base: close: component openib closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component openib
>>>>>>> [manage.cluster:17487] mca: base: close: component openib closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component openib
>>>>>>>
>>>>>>> Tetsuya Mishima
>>>>>>>
>>>>>>> 2016/07/27 9:20:28、"devel"さんは「Re: [OMPI devel] sm BTL
>> performace of
>>>>>>> the openmpi-2.0.0」で書きました
>>>>>>>> sm is deprecated in 2.0.0 and will likely be removed in favor of
>> vader
>>>>>> in
>>>>>>> 2.1.0.
>>>>>>>>
>>>>>>>> This issue is probably this known issue:
>>>>>>> https://github.com/open-mpi/ompi-release/pull/1250
>>>>>>>>
>>>>>>>> Please apply those commits and see if it fixes the issue for you.
>>>>>>>>
>>>>>>>> -Nathan
>>>>>>>>
>>>>>>>>> On Jul 26, 2016, at 6:17 PM, [email protected] wrote:
>>>>>>>>>
>>>>>>>>> Hi Gilles,
>>>>>>>>>
>>>>>>>>> Thanks. I ran again with --mca pml ob1 but I've got the same
>> results
>>>>>> as
>>>>>>>>> below:
>>>>>>>>>
>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca pml ob1
>>>>>>> -bind-to
>>>>>>>>> core -report-bindings osu_bw
>>>>>>>>> [manage.cluster:18142] MCW rank 0 bound to socket 0[core 0[hwt
>> 0]]:
>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>> [manage.cluster:18142] MCW rank 1 bound to socket 0[core 1[hwt
>> 0]]:
>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>> # Size Bandwidth (MB/s)
>>>>>>>>> 1 1.48
>>>>>>>>> 2 3.07
>>>>>>>>> 4 6.26
>>>>>>>>> 8 12.53
>>>>>>>>> 16 24.33
>>>>>>>>> 32 49.03
>>>>>>>>> 64 83.46
>>>>>>>>> 128 132.60
>>>>>>>>> 256 234.96
>>>>>>>>> 512 420.86
>>>>>>>>> 1024 842.37
>>>>>>>>> 2048 1231.65
>>>>>>>>> 4096 264.67
>>>>>>>>> 8192 472.16
>>>>>>>>> 16384 740.42
>>>>>>>>> 32768 1030.39
>>>>>>>>> 65536 1191.16
>>>>>>>>> 131072 1269.45
>>>>>>>>> 262144 1238.33> > 524288 1247.97
>>>>>>>>> 1048576 1257.96
>>>>>>>>> 2097152 1274.74
>>>>>>>>> 4194304 1280.94
>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca pml ob1
>>>>>> -mca
>>>>>>> btl
>>>>>>>>> self,sm -bind-to core -report-bindings osu_b
>>>>>>>>> w
>>>>>>>>> [manage.cluster:18204] MCW rank 0 bound to socket 0[core 0[hwt
>> 0]]:
>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>> [manage.cluster:18204] MCW rank 1 bound to socket 0[core 1[hwt
>> 0]]:
>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>> # Size Bandwidth (MB/s)
>>>>>>>>> 1 0.52
>>>>>>>>> 2 1.05
>>>>>>>>> 4 2.08
>>>>>>>>> 8 4.18
>>>>>>>>> 16 8.21
>>>>>>>>> 32 16.65
>>>>>>>>> 64 32.60
>>>>>>>>> 128 66.70
>>>>>>>>> 256 132.45
>>>>>>>>> 512 269.27
>>>>>>>>> 1024 504.63
>>>>>>>>> 2048 819.76
>>>>>>>>> 4096 874.54
>>>>>>>>> 8192 1447.11
>>>>>>>>> 16384 2263.28
>>>>>>>>> 32768 3236.85
>>>>>>>>> 65536 3567.34
>>>>>>>>> 131072 3555.17
>>>>>>>>> 262144 3455.76
>>>>>>>>> 524288 3441.80
>>>>>>>>> 1048576 3505.30
>>>>>>>>> 2097152 3534.01
>>>>>>>>> 4194304 3546.94
>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca pml ob1
>>>>>> -mca
>>>>>>> btl
>>>>>>>>> self,sm,openib -bind-to core -report-binding
>>>>>>>>> s osu_bw
>>>>>>>>> [manage.cluster:18218] MCW rank 0 bound to socket 0[core 0[hwt
>> 0]]:
>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>> [manage.cluster:18218] MCW rank 1 bound to socket 0[core 1[hwt
>> 0]]:
>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>> # Size Bandwidth (MB/s)
>>>>>>>>> 1 0.51
>>>>>>>>> 2 1.03
>>>>>>>>> 4 2.05
>>>>>>>>> 8 4.07
>>>>>>>>> 16 8.14
>>>>>>>>> 32 16.32
>>>>>>>>> 64 32.98
>>>>>>>>> 128 63.70
>>>>>>>>> 256 126.66
>>>>>>>>> 512 252.61
>>>>>>>>> 1024 480.22
>>>>>>>>> 2048 810.54
>>>>>>>>> 4096 290.61
>>>>>>>>> 8192 512.49
>>>>>>>>> 16384 764.60
>>>>>>>>> 32768 1036.81
>>>>>>>>> 65536 1182.81
>>>>>>>>> 131072 1264.48
>>>>>>>>> 262144 1235.82
>>>>>>>>> 524288 1246.70
>>>>>>>>> 1048576 1254.66
>>>>>>>>> 2097152 1274.64
>>>>>>>>> 4194304 1280.65
>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca pml ob1
>>>>>> -mca
>>>>>>> btl
>>>>>>>>> self,openib -bind-to core -report-bindings o
>>>>>>>>> su_bw
>>>>>>>>> [manage.cluster:18276] MCW rank 0 bound to socket 0[core 0[hwt
>> 0]]:
>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>> [manage.cluster:18276] MCW rank 1 bound to socket 0[core 1[hwt
>> 0]]:
>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>> # Size Bandwidth (MB/s)
>>>>>>>>> 1 0.54
>>>>>>>>> 2 1.08
>>>>>>>>> 4 2.18
>>>>>>>>> 8 4.33
>>>>>>>>> 16 8.69
>>>>>>>>> 32 17.39
>>>>>>>>> 64 34.34
>>>>>>>>> 128 66.28
>>>>>>>>> 256 130.36
>>>>>>>>> 512 241.81
>>>>>>>>> 1024 429.86
>>>>>>>>> 2048 553.44
>>>>>>>>> 4096 707.14
>>>>>>>>> 8192 879.60
>>>>>>>>> 16384 763.02
>>>>>>>>> 32768 1042.89
>>>>>>>>> 65536 1185.45
>>>>>>>>> 131072 1267.56
>>>>>>>>> 262144 1227.41
>>>>>>>>> 524288 1244.61
>>>>>>>>> 1048576 1255.66
>>>>>>>>> 2097152 1273.55
>>>>>>>>> 4194304 1281.05
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> 2016/07/27 9:02:49、"devel"さんは「Re: [OMPI devel] sm BTL
>> performace
>>>>>>> of
>>>>>>>>> the openmpi-2.0.0」で書きました
>>>>>>>>>> Hi,
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> can you please run again with
>>>>>>>>>>
>>>>>>>>>> --mca pml ob1
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> if Open MPI was built with mxm support, pml/cm and mtl/mxm are
>> used
>>>>>>>>>> instead of pml/ob1 and btl/openib
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Cheers,
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Gilles
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> On 7/27/2016 8:56 AM, [email protected] wrote:
>>>>>>>>>>> Hi folks,
>>>>>>>>>>>
>>>>>>>>>>> I saw a performance degradation of openmpi-2.0.0 when I ran our
>>>>>>>>> application
>>>>>>>>>>> on a node (12cores). So I did 4 tests using osu_bw as below:
>>>>>>>>>>>
>>>>>>>>>>> 1: mpirun –np 2 osu_bw bad(30% of test2)
>>>>>>>>>>> 2: mpirun –np 2 –mca btl self,sm osu_bw good(same as
>>>>>>>>> openmpi1.10.3)
>>>>>>>>>>> 3: mpirun –np 2 –mca btl self,sm,openib osu_bw bad(30% of
>>>>>> test2)
>>>>>>>>>>> 4: mpirun –np 2 –mca btl self,openib osu_bw bad(30% of
>> test2)
>>>>>>>>>>>
>>>>>>>>>>> I guess openib BTL was used in the test 1 and 3, because these
>>>>>>> results
>>>>>>>>> are
>>>>>>>>>>> almost same as test 4. I believe that sm BTL should be used
>>>>>> even
>>>>>>> in
>>>>>>>>> the
>>>>>>>>>>> test 1 and 3, because its priority is higher than openib.
>>>>>>>>> Unfortunately, at
>>>>>>>>>>> the moment, I couldn’t figure out the root cause. So please
>>>>>> someone
>>>>>>>>> would
>>>>>>>>>>> take care of it.
>>>>>>>>>>>
>>>>>>>>>>> Regards,
>>>>>>>>>>> Tetsuya Mishima
>>>>>>>>>>>
>>>>>>>>>>> P.S. Here I attached these test results.
>>>>>>>>>>>
>>>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2
>>>>>> -bind-to
>>>>>>>>> core
>>>>>>>>>>> -report-bindings osu_bw
>>>>>>>>>>> [manage.cluster:13389] MCW rank 0 bound to socket 0[core
>> 0
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>>>> [manage.cluster:13389] MCW rank 1 bound to socket 0[core
>> 1
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>>>> # Size Bandwidth (MB/s)
>>>>>>>>>>> 1 1.49
>>>>>>>>>>> 2 3.04
>>>>>>>>>>> 4 6.13
>>>>>>>>>>> 8 12.23
>>>>>>>>>>> 16 25.01
>>>>>>>>>>> 32 49.96
>>>>>>>>>>> 64 87.07
>>>>>>>>>>> 128 138.87
>>>>>>>>>>> 256 245.97
>>>>>>>>>>> 512 423.30
>>>>>>>>>>> 1024 865.85
>>>>>>>>>>> 2048 1279.63
>>>>>>>>>>> 4096 264.79
>>>>>>>>>>> 8192 473.92
>>>>>>>>>>> 16384 739.27
>>>>>>>>>>> 32768 1030.49
>>>>>>>>>>> 65536 1190.21
>>>>>>>>>>> 131072 1270.77
>>>>>>>>>>> 262144 1238.74
>>>>>>>>>>> 524288 1245.97
>>>>>>>>>>> 1048576 1260.09
>>>>>>>>>>> 2097152 1274.53
>>>>>>>>>>> 4194304 1285.07
>>>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca
>> btl
>>>>>>>>> self,sm
>>>>>>>>>>> -bind-to core -report-bindings osu_bw
>>>>>>>>>>> [manage.cluster:13448] MCW rank 0 bound to socket 0[core
>> 0
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>>>> [manage.cluster:13448] MCW rank 1 bound to socket 0[core
>> 1
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>>>> # Size Bandwidth (MB/s)
>>>>>>>>>>> 1 0.51
>>>>>>>>>>> 2 1.01
>>>>>>>>>>> 4 2.03
>>>>>>>>>>> 8 4.08
>>>>>>>>>>> 16 7.92
>>>>>>>>>>> 32 16.16
>>>>>>>>>>> 64 32.53
>>>>>>>>>>> 128 64.30
>>>>>>>>>>> 256 128.19
>>>>>>>>>>> 512 256.48
>>>>>>>>>>> 1024 468.62
>>>>>>>>>>> 2048 785.29
>>>>>>>>>>> 4096 854.78
>>>>>>>>>>> 8192 1404.51
>>>>>>>>>>> 16384 2249.20
>>>>>>>>>>> 32768 3136.40
>>>>>>>>>>> 65536 3495.84
>>>>>>>>>>> 131072 3436.69
>>>>>>>>>>> 262144 3392.11
>>>>>>>>>>> 524288 3400.07
>>>>>>>>>>> 1048576 3460.60
>>>>>>>>>>> 2097152 3488.09
>>>>>>>>>>> 4194304 3498.45
>>>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2
>>>>>> -mca
>>>>>>>>> btl
>>>>>>>>>>> self,sm,openib -bind-to core -report-bindings osu_bw
>>>>>>>>>>> [manage.cluster:13462] MCW rank 0 bound to socket 0[core
>> 0
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>>>> [manage.cluster:13462] MCW rank 1 bound to socket 0[core
>> 1
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>>>> # Size Bandwidth (MB/s)
>>>>>>>>>>> 1 0.54
>>>>>>>>>>> 2 1.09
>>>>>>>>>>> 4 2.18
>>>>>>>>>>> 8 4.37
>>>>>>>>>>> 16 8.75
>>>>>>>>>>> 32 17.37
>> _______________________________________________
>> devel mailing list
>> [email protected]
>> https://rfd.newmexicoconsortium.org/mailman/listinfo/devel
_______________________________________________
devel mailing list
[email protected]
https://rfd.newmexicoconsortium.org/mailman/listinfo/devel