This patch also modifies the put path. Let me know if this works:

diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c b/ompi/mca/pml/ob1/pml_ob1_rdma.c
index 888e126..a3ec6f8 100644
--- a/ompi/mca/pml/ob1/pml_ob1_rdma.c
+++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c
@@ -42,6 +42,7 @@ size_t mca_pml_ob1_rdma_btls(
     mca_pml_ob1_com_btl_t* rdma_btls)
 {
     int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
+    int num_eager_btls = mca_bml_base_btl_array_get_size 
(&bml_endpoint->btl_eager);
     double weight_total = 0;
     int num_btls_used = 0;
 
@@ -57,6 +58,21 @@ size_t mca_pml_ob1_rdma_btls(
                     (bml_endpoint->btl_rdma_index + n) % num_btls);
         mca_btl_base_registration_handle_t *reg_handle = NULL;
         mca_btl_base_module_t *btl = bml_btl->btl;
+        bool ignore = true;
+
+        /* do not use rdma btls that are not in the eager list. this is 
necessary to avoid using
+         * btls that exist on the endpoint only to support RMA. */
+        for (int i = 0 ; i < num_eager_btls ; ++i) {
+            mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index 
(&bml_endpoint->btl_eager, i);
+            if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
+                ignore = false;
+                break;
+            }
+        }
+
+        if (ignore) {
+            continue;
+        }
 
         if (btl->btl_register_mem) {
             /* do not use the RDMA protocol with this btl if 1) leave pinned 
is disabled,
@@ -99,18 +115,34 @@ size_t mca_pml_ob1_rdma_pipeline_btls( 
mca_bml_base_endpoint_t* bml_endpoint,
                                        size_t size,
                                        mca_pml_ob1_com_btl_t* rdma_btls )
 {
-    int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
+    int num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
+    int num_eager_btls = mca_bml_base_btl_array_get_size 
(&bml_endpoint->btl_eager);
     double weight_total = 0;
+    int rdma_count = 0;
 
-    for(i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
-        rdma_btls[i].bml_btl =
-            mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
-        rdma_btls[i].btl_reg = NULL;
+    for(int i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
+        mca_bml_base_btl_t *bml_btl = 
mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
+        bool ignore = true;
+
+        for (int i = 0 ; i < num_eager_btls ; ++i) {
+            mca_bml_base_btl_t *eager_btl = mca_bml_base_btl_array_get_index 
(&bml_endpoint->btl_eager, i);
+            if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
+                ignore = false;
+                break;
+            }
+        }
 
-        weight_total += rdma_btls[i].bml_btl->btl_weight;
+        if (ignore) {
+            continue;
+        }
+
+        rdma_btls[rdma_count].bml_btl = bml_btl;
+        rdma_btls[rdma_count++].btl_reg = NULL;
+
+        weight_total += bml_btl->btl_weight;
     }
 
-    mca_pml_ob1_calc_weighted_length(rdma_btls, i, size, weight_total);
+    mca_pml_ob1_calc_weighted_length (rdma_btls, rdma_count, size, 
weight_total);
 
-    return i;
+    return rdma_count;
 }




> On Aug 7, 2016, at 6:51 PM, Nathan Hjelm <hje...@me.com> wrote:
> 
> Looks like the put path probably needs a similar patch. Will send another 
> patch soon. 
> 
>> On Aug 7, 2016, at 6:01 PM, tmish...@jcity.maeda.co.jp wrote:
>> 
>> Hi,
>> 
>> I applied the patch to the file "pml_ob1_rdma.c" and ran osu_bw again.
>> Then, I still see the bad performance for larger size(>=2097152 ).
>> 
>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -report-bindings
>> osu_bw
>> [manage.cluster:27444] MCW rank 0 bound to socket 0[core 0[hwt 0]], socket
>> 0[core 1[hwt 0]], socket 0[core 2[hwt 0]], so
>> cket 0[core 3[hwt 0]], socket 0[core 4[hwt 0]], socket 0[core 5[hwt 0]]:
>> [B/B/B/B/B/B][./././././.]
>> [manage.cluster:27444] MCW rank 1 bound to socket 0[core 0[hwt 0]], socket
>> 0[core 1[hwt 0]], socket 0[core 2[hwt 0]], so
>> cket 0[core 3[hwt 0]], socket 0[core 4[hwt 0]], socket 0[core 5[hwt 0]]:
>> [B/B/B/B/B/B][./././././.]
>> # OSU MPI Bandwidth Test v3.1.1
>> # Size        Bandwidth (MB/s)
>> 1                         2.23
>> 2                         4.52
>> 4                         8.82
>> 8                        17.83
>> 16                       35.31
>> 32                       69.49
>> 64                      109.46
>> 128                     178.51
>> 256                     307.68
>> 512                     532.64
>> 1024                    909.34
>> 2048                   1583.95
>> 4096                   1554.74
>> 8192                   2120.31
>> 16384                  2489.79
>> 32768                  2853.66
>> 65536                  3692.82
>> 131072                 4236.67
>> 262144                 4575.63
>> 524288                 4778.47
>> 1048576                4839.34
>> 2097152                2231.46
>> 4194304                1505.48
>> 
>> Regards,
>> 
>> Tetsuya Mishima
>> 
>> 2016/08/06 0:00:08、"devel"さんは「Re: [OMPI devel] sm BTL performace of
>> the openmpi-2.0.0」で書きました
>>> Making ob1 ignore RDMA btls that are not in use for eager messages might
>> be sufficient. Please try the following patch and let me know if it works
>> for you.
>>> 
>>> diff --git a/ompi/mca/pml/ob1/pml_ob1_rdma.c
>> b/ompi/mca/pml/ob1/pml_ob1_rdma.c
>>> index 888e126..0c99525 100644
>>> --- a/ompi/mca/pml/ob1/pml_ob1_rdma.c
>>> +++ b/ompi/mca/pml/ob1/pml_ob1_rdma.c
>>> @@ -42,6 +42,7 @@ size_t mca_pml_ob1_rdma_btls(
>>> mca_pml_ob1_com_btl_t* rdma_btls)
>>> {
>>> int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
>>> +    int num_eager_btls = mca_bml_base_btl_array_get_size
>> (&bml_endpoint->btl_eager);
>>> double weight_total = 0;
>>> int num_btls_used = 0;
>>> 
>>> @@ -57,6 +58,21 @@ size_t mca_pml_ob1_rdma_btls(
>>> (bml_endpoint->btl_rdma_index + n) % num_btls);
>>> mca_btl_base_registration_handle_t *reg_handle = NULL;
>>> mca_btl_base_module_t *btl = bml_btl->btl;
>>> +        bool ignore = true;
>>> +
>>> +        /* do not use rdma btls that are not in the eager list. this is
>> necessary to avoid using
>>> +         * btls that exist on the endpoint only to support RMA. */
>>> +        for (int i = 0 ; i < num_eager_btls ; ++i) {
>>> +            mca_bml_base_btl_t *eager_btl =
>> mca_bml_base_btl_array_get_index (&bml_endpoint->btl_eager, i);
>>> +            if (eager_btl->btl_endpoint == bml_btl->btl_endpoint) {
>>> +                ignore = false;
>>> +                break;
>>> +            }
>>> +        }
>>> +
>>> +        if (ignore) {
>>> +            continue;
>>> +        }
>>> 
>>> if (btl->btl_register_mem) {
>>> /* do not use the RDMA protocol with this btl if 1) leave pinned is
>> disabled,
>>> 
>>> 
>>> 
>>> -Nathan
>>> 
>>> 
>>>> On Aug 5, 2016, at 8:44 AM, Nathan Hjelm <hje...@me.com> wrote:
>>>> 
>>>> Nope. We are not going to change the flags as this will disable the blt
>> for one-sided. Not sure what is going on here as the openib btl should be
>> 1) not used for pt2pt, and 2) polled infrequently.
>>> The btl debug log suggests both of these are the case. Not sure what is
>> going on yet.
>>>> 
>>>> -Nathan
>>>> 
>>>>> On Aug 5, 2016, at 8:16 AM, r...@open-mpi.org wrote:
>>>>> 
>>>>> Perhaps those flags need to be the default?
>>>>> 
>>>>> 
>>>>>> On Aug 5, 2016, at 7:14 AM, tmish...@jcity.maeda.co.jp wrote:
>>>>>> 
>>>>>> Hi Christoph,
>>>>>> 
>>>>>> I applied the commits - pull/#1250 as Nathan told me and added "-mca
>>>>>> btl_openib_flags 311" to the mpirun command line option, then it
>> worked for
>>>>>> me. I don't know the reason, but it looks ATOMIC_FOP in the
>>>>>> btl_openib_flags degrades the sm/vader perfomance.
>>>>>> 
>>>>>> Regards,
>>>>>> Tetsuya Mishima
>>>>>> 
>>>>>> 
>>>>>> 2016/08/05 22:10:37、"devel"さんは「Re: [OMPI devel] sm BTL
>> performace of
>>>>>> the openmpi-2.0.0」で書きました
>>>>>>> Hello,
>>>>>>> 
>>>>>>> We see the same problem here on various machines with Open MPI
>> 2.0.0.
>>>>>>> To us it seems that enabling the openib btl triggers bad performance
>> for
>>>>>> the sm AND vader btls!
>>>>>>> --mca btl_base_verbose 10 reports in both cases the correct use of
>> sm and
>>>>>> vader between MPI ranks - only performance differs?!
>>>>>>> 
>>>>>>> One irritating thing I see in the log output is the following:
>>>>>>> openib BTL: rdmacm CPC unavailable for use on mlx4_0:1; skipped
>>>>>>> [rank=1] openib: using port mlx4_0:1
>>>>>>> select: init of component openib returned success
>>>>>>> 
>>>>>>> Did not look into the "Skipped" code part yet, ...
>>>>>>> 
>>>>>>> Results see below.
>>>>>>> 
>>>>>>> Best regards
>>>>>>> Christoph Niethammer
>>>>>>> 
>>>>>>> --
>>>>>>> 
>>>>>>> Christoph Niethammer
>>>>>>> High Performance Computing Center Stuttgart (HLRS)
>>>>>>> Nobelstrasse 19
>>>>>>> 70569 Stuttgart
>>>>>>> 
>>>>>>> Tel: ++49(0)711-685-87203
>>>>>>> email: nietham...@hlrs.de
>>>>>>> http://www.hlrs.de/people/niethammer
>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>> mpirun -np 2 --mca btl self,vader  osu_bw
>>>>>>> # OSU MPI Bandwidth Test
>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>> 1                         4.83
>>>>>>> 2                        10.30
>>>>>>> 4                        24.68
>>>>>>> 8                        49.27
>>>>>>> 16                       95.80
>>>>>>> 32                      187.52
>>>>>>> 64                      270.82
>>>>>>> 128                     405.00
>>>>>>> 256                     659.26
>>>>>>> 512                    1165.14
>>>>>>> 1024                   2372.83
>>>>>>> 2048                   3592.85
>>>>>>> 4096                   4283.51
>>>>>>> 8192                   5523.55
>>>>>>> 16384                  7388.92
>>>>>>> 32768                  7024.37
>>>>>>> 65536                  7353.79
>>>>>>> 131072                 7465.96
>>>>>>> 262144                 8597.56
>>>>>>> 524288                 9292.86
>>>>>>> 1048576                9168.01
>>>>>>> 2097152                9009.62
>>>>>>> 4194304                9013.02
>>>>>>> 
>>>>>>> mpirun -np 2 --mca btl self,vader,openib  osu_bw
>>>>>>> # OSU MPI Bandwidth Test
>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>> 1                         5.32
>>>>>>> 2                        11.14
>>>>>>> 4                        20.88
>>>>>>> 8                        49.26
>>>>>>> 16                       99.11
>>>>>>> 32                      197.42
>>>>>>> 64                      301.08
>>>>>>> 128                     413.64
>>>>>>> 256                     651.15
>>>>>>> 512                    1161.12
>>>>>>> 1024                   2460.99
>>>>>>> 2048                   3627.36
>>>>>>> 4096                   2191.06
>>>>>>> 8192                   3118.36
>>>>>>> 16384                  3428.45
>>>>>>> 32768                  3676.96
>>>>>>> 65536                  3709.65
>>>>>>> 131072                 3748.64
>>>>>>> 262144                 3764.88
>>>>>>> 524288                 3764.61
>>>>>>> 1048576                3772.45
>>>>>>> 2097152                3757.37
>>>>>>> 4194304                3746.45
>>>>>>> 
>>>>>>> mpirun -np 2 --mca btl self,sm  osu_bw
>>>>>>> # OSU MPI Bandwidth Test
>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>> 1                         2.98
>>>>>>> 2                         5.97
>>>>>>> 4                        11.99
>>>>>>> 8                        23.47
>>>>>>> 16                       50.64
>>>>>>> 32                       99.91
>>>>>>> 64                      197.87
>>>>>>> 128                     343.32
>>>>>>> 256                     667.48
>>>>>>> 512                    1200.86
>>>>>>> 1024                   2050.05
>>>>>>> 2048                   3578.52
>>>>>>> 4096                   3966.92
>>>>>>> 8192                   5687.96
>>>>>>> 16384                  7395.88
>>>>>>> 32768                  7101.41
>>>>>>> 65536                  7619.49
>>>>>>> 131072                 7978.09
>>>>>>> 262144                 8648.87
>>>>>>> 524288                 9129.18
>>>>>>> 1048576               10525.31
>>>>>>> 2097152               10511.63
>>>>>>> 4194304               10489.66
>>>>>>> 
>>>>>>> mpirun -np 2 --mca btl self,sm,openib  osu_bw
>>>>>>> # OSU MPI Bandwidth Test
>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>> 1                         2.02
>>>>>>> 2                         3.00
>>>>>>> 4                         9.99
>>>>>>> 8                        19.96
>>>>>>> 16                       40.10
>>>>>>> 32                       70.63
>>>>>>> 64                      144.08
>>>>>>> 128                     282.21
>>>>>>> 256                     543.55
>>>>>>> 512                    1032.61
>>>>>>> 1024                   1871.09
>>>>>>> 2048                   3294.07
>>>>>>> 4096                   2336.48
>>>>>>> 8192                   3142.22
>>>>>>> 16384                  3419.93
>>>>>>> 32768                  3647.30
>>>>>>> 65536                  3725.40
>>>>>>> 131072                 3749.43
>>>>>>> 262144                 3765.31
>>>>>>> 524288                 3771.06
>>>>>>> 1048576                3772.54
>>>>>>> 2097152                3760.93
>>>>>>> 4194304                3745.37
>>>>>>> 
>>>>>>> ----- Original Message -----
>>>>>>> From: tmish...@jcity.maeda.co.jp
>>>>>>> To: "Open MPI Developers" <de...@open-mpi.org>
>>>>>>> Sent: Wednesday, July 27, 2016 6:04:48 AM
>>>>>>> Subject: Re: [OMPI devel] sm BTL performace of the openmpi-2.0.0
>>>>>>> 
>>>>>>> HiNathan,
>>>>>>> 
>>>>>>> I applied those commits and ran again without any BTL specified.
>>>>>>> 
>>>>>>> Then, although it says "mca: bml: Using vader btl for send to
>>>>>> [[18993,1],1]
>>>>>>> on node manage",
>>>>>>> the osu_bw still shows it's very slow as shown below:
>>>>>>> 
>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca
>>>>>> btl_base_verbose
>>>>>>> 10 -bind-to core -report-bindings osu_bw
>>>>>>> [manage.cluster:17482] MCW rank 0 bound to socket 0[core 0[hwt 0]]:
>>>>>>> [B/././././.][./././././.]
>>>>>>> [manage.cluster:17482] MCW rank 1 bound to socket 0[core 1[hwt 0]]:
>>>>>>> [./B/./././.][./././././.]
>>>>>>> [manage.cluster:17487] mca: base: components_register: registering
>>>>>>> framework btl components
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component self
>>>>>>> [manage.cluster:17487] mca: base: components_register: component
>> self
>>>>>>> register function successful
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component vader
>>>>>>> [manage.cluster:17488] mca: base: components_register: registering
>>>>>>> framework btl components
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component self
>>>>>>> [manage.cluster:17487] mca: base: components_register: component
>> vader
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: component
>> self
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component vader
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component tcp
>>>>>>> [manage.cluster:17488] mca: base: components_register: component
>> vader
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component tcp
>>>>>>> [manage.cluster:17487] mca: base: components_register: component tcp
>>>>>>> register function successful
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component sm
>>>>>>> [manage.cluster:17488] mca: base: components_register: component tcp
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component sm
>>>>>>> [manage.cluster:17487] mca: base: components_register: component sm
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: component sm
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_register: found loaded
>>>>>>> component openib
>>>>>>> [manage.cluster:17487] mca: base: components_register: found loaded
>>>>>>> component openib
>>>>>>> [manage.cluster:17488] mca: base: components_register: component
>> openib
>>>>>>> register function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: opening btl
>> components
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> self
>>>>>>> [manage.cluster:17488] mca: base: components_open: component self
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> vader
>>>>>>> [manage.cluster:17488] mca: base: components_open: component vader
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> tcp
>>>>>>> [manage.cluster:17488] mca: base: components_open: component tcp
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> sm
>>>>>>> [manage.cluster:17488] mca: base: components_open: component sm open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] mca: base: components_open: found loaded
>> component
>>>>>>> openib
>>>>>>> [manage.cluster:17488] mca: base: components_open: component openib
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17488] select: initializing btl component self
>>>>>>> [manage.cluster:17488] select: init of component self returned
>> success
>>>>>>> [manage.cluster:17488] select: initializing btl component vader
>>>>>>> [manage.cluster:17487] mca: base: components_register: component
>> openib
>>>>>>> register function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: opening btl
>> components
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> self
>>>>>>> [manage.cluster:17487] mca: base: components_open: component self
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> vader
>>>>>>> [manage.cluster:17487] mca: base: components_open: component vader
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> tcp
>>>>>>> [manage.cluster:17487] mca: base: components_open: component tcp
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> sm
>>>>>>> [manage.cluster:17487] mca: base: components_open: component sm open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] mca: base: components_open: found loaded
>> component
>>>>>>> openib
>>>>>>> [manage.cluster:17488] select: init of component vader returned
>> success
>>>>>>> [manage.cluster:17488] select: initializing btl component tcp
>>>>>>> [manage.cluster:17487] mca: base: components_open: component openib
>> open
>>>>>>> function successful
>>>>>>> [manage.cluster:17487] select: initializing btl component self
>>>>>>> [manage.cluster:17487] select: init of component self returned
>> success
>>>>>>> [manage.cluster:17487] select: initializing btl component vader
>>>>>>> [manage.cluster:17488] select: init of component tcp returned
>> success
>>>>>>> [manage.cluster:17488] select: initializing btl component sm
>>>>>>> [manage.cluster:17488] select: init of component sm returned success
>>>>>>> [manage.cluster:17488] select: initializing btl component openib
>>>>>>> [manage.cluster:17487] select: init of component vader returned
>> success
>>>>>>> [manage.cluster:17487] select: initializing btl component tcp
>>>>>>> [manage.cluster:17487] select: init of component tcp returned
>> success
>>>>>>> [manage.cluster:17487] select: initializing btl component sm
>>>>>>> [manage.cluster:17488] Checking distance from this process to
>>>>>> device=mthca0
>>>>>>> [manage.cluster:17488] hwloc_distances->nbobjs=2
>>>>>>> [manage.cluster:17488] hwloc_distances->latency[0]=1.000000
>>>>>>> [manage.cluster:17488] hwloc_distances->latency[1]=1.600000
>>>>>>> [manage.cluster:17488] hwloc_distances->latency[2]=1.600000
>>>>>>> [manage.cluster:17488] hwloc_distances->latency[3]=1.000000
>>>>>>> [manage.cluster:17488] ibv_obj->type set to NULL
>>>>>>> [manage.cluster:17488] Process is bound: distance to device is
>> 0.000000
>>>>>>> [manage.cluster:17487] select: init of component sm returned success
>>>>>>> [manage.cluster:17487] select: initializing btl component openib
>>>>>>> [manage.cluster:17488] openib BTL: rdmacm CPC unavailable for use on
>>>>>>> mthca0:1; skipped
>>>>>>> [manage.cluster:17487] Checking distance from this process to
>>>>>> device=mthca0
>>>>>>> [manage.cluster:17487] hwloc_distances->nbobjs=2
>>>>>>> [manage.cluster:17487] hwloc_distances->latency[0]=1.000000
>>>>>>> [manage.cluster:17487] hwloc_distances->latency[1]=1.600000
>>>>>>> [manage.cluster:17487] hwloc_distances->latency[2]=1.600000
>>>>>>> [manage.cluster:17487] hwloc_distances->latency[3]=1.000000
>>>>>>> [manage.cluster:17487] ibv_obj->type set to NULL
>>>>>>> [manage.cluster:17487] Process is bound: distance to device is
>> 0.000000
>>>>>>> [manage.cluster:17488] [rank=1] openib: using port mthca0:1
>>>>>>> [manage.cluster:17488] select: init of component openib returned
>> success
>>>>>>> [manage.cluster:17487] openib BTL: rdmacm CPC unavailable for use on
>>>>>>> mthca0:1; skipped
>>>>>>> [manage.cluster:17487] [rank=0] openib: using port mthca0:1>>>>
>> [manage.cluster:17487] select: init of component openib returned success
>>>>>>> [manage.cluster:17488] mca: bml: Using self btl for send to
>> [[18993,1],1]
>>>>>>> on node manage
>>>>>>> [manage.cluster:17487] mca: bml: Using self btl for send to
>> [[18993,1],0]
>>>>>>> on node manage
>>>>>>> [manage.cluster:17488] mca: bml: Using vader btl for send to
>>>>>> [[18993,1],0]
>>>>>>> on node manage
>>>>>>> [manage.cluster:17487] mca: bml: Using vader btl for send to
>>>>>> [[18993,1],1]
>>>>>>> on node manage
>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>> 1                         1.76
>>>>>>> 2                         3.53
>>>>>>> 4                         7.06
>>>>>>> 8                        14.46
>>>>>>> 16                       29.12
>>>>>>> 32                       57.54
>>>>>>> 64                      100.12
>>>>>>> 128                     157.78
>>>>>>> 256                     277.32
>>>>>>> 512                     477.53
>>>>>>> 1024                    894.81
>>>>>>> 2048                   1330.68
>>>>>>> 4096                    278.58
>>>>>>> 8192                    516.00
>>>>>>> 16384                   762.99
>>>>>>> 32768                  1037.19
>>>>>>> 65536                  1181.66
>>>>>>> 131072                 1261.91
>>>>>>> 262144                 1237.39
>>>>>>> 524288                 1247.86
>>>>>>> 1048576                1252.04
>>>>>>> 2097152                1273.46
>>>>>>> 4194304                1281.21
>>>>>>> [manage.cluster:17488] mca: base: close: component self closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component self
>>>>>>> [manage.cluster:17487] mca: base: close: component self closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component self
>>>>>>> [manage.cluster:17488] mca: base: close: component vader closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component vader
>>>>>>> [manage.cluster:17487] mca: base: close: component vader closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component vader
>>>>>>> [manage.cluster:17488] mca: base: close: component tcp closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component tcp
>>>>>>> [manage.cluster:17487] mca: base: close: component tcp closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component tcp
>>>>>>> [manage.cluster:17488] mca: base: close: component sm closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component sm
>>>>>>> [manage.cluster:17487] mca: base: close: component sm closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component sm
>>>>>>> [manage.cluster:17488] mca: base: close: component openib closed
>>>>>>> [manage.cluster:17488] mca: base: close: unloading component openib
>>>>>>> [manage.cluster:17487] mca: base: close: component openib closed
>>>>>>> [manage.cluster:17487] mca: base: close: unloading component openib
>>>>>>> 
>>>>>>> Tetsuya Mishima
>>>>>>> 
>>>>>>> 2016/07/27 9:20:28、"devel"さんは「Re: [OMPI devel] sm BTL
>> performace of
>>>>>>> the openmpi-2.0.0」で書きました
>>>>>>>> sm is deprecated in 2.0.0 and will likely be removed in favor of
>> vader
>>>>>> in
>>>>>>> 2.1.0.
>>>>>>>> 
>>>>>>>> This issue is probably this known issue:
>>>>>>> https://github.com/open-mpi/ompi-release/pull/1250
>>>>>>>> 
>>>>>>>> Please apply those commits and see if it fixes the issue for you.
>>>>>>>> 
>>>>>>>> -Nathan
>>>>>>>> 
>>>>>>>>> On Jul 26, 2016, at 6:17 PM, tmish...@jcity.maeda.co.jp wrote:
>>>>>>>>> 
>>>>>>>>> Hi Gilles,
>>>>>>>>> 
>>>>>>>>> Thanks. I ran again with --mca pml ob1 but I've got the same
>> results
>>>>>> as
>>>>>>>>> below:
>>>>>>>>> 
>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca pml ob1
>>>>>>> -bind-to
>>>>>>>>> core -report-bindings osu_bw
>>>>>>>>> [manage.cluster:18142] MCW rank 0 bound to socket 0[core 0[hwt
>> 0]]:
>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>> [manage.cluster:18142] MCW rank 1 bound to socket 0[core 1[hwt
>> 0]]:
>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>>>> 1                         1.48
>>>>>>>>> 2                         3.07
>>>>>>>>> 4                         6.26
>>>>>>>>> 8                        12.53
>>>>>>>>> 16                       24.33
>>>>>>>>> 32                       49.03
>>>>>>>>> 64                       83.46
>>>>>>>>> 128                     132.60
>>>>>>>>> 256                     234.96
>>>>>>>>> 512                     420.86
>>>>>>>>> 1024                    842.37
>>>>>>>>> 2048                   1231.65
>>>>>>>>> 4096                    264.67
>>>>>>>>> 8192                    472.16
>>>>>>>>> 16384                   740.42
>>>>>>>>> 32768                  1030.39
>>>>>>>>> 65536                  1191.16
>>>>>>>>> 131072                 1269.45
>>>>>>>>> 262144                 1238.33> > 524288                 1247.97
>>>>>>>>> 1048576                1257.96
>>>>>>>>> 2097152                1274.74
>>>>>>>>> 4194304                1280.94
>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca pml ob1
>>>>>> -mca
>>>>>>> btl
>>>>>>>>> self,sm -bind-to core -report-bindings osu_b
>>>>>>>>> w
>>>>>>>>> [manage.cluster:18204] MCW rank 0 bound to socket 0[core 0[hwt
>> 0]]:
>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>> [manage.cluster:18204] MCW rank 1 bound to socket 0[core 1[hwt
>> 0]]:
>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>>>> 1                         0.52
>>>>>>>>> 2                         1.05
>>>>>>>>> 4                         2.08
>>>>>>>>> 8                         4.18
>>>>>>>>> 16                        8.21
>>>>>>>>> 32                       16.65
>>>>>>>>> 64                       32.60
>>>>>>>>> 128                      66.70
>>>>>>>>> 256                     132.45
>>>>>>>>> 512                     269.27
>>>>>>>>> 1024                    504.63
>>>>>>>>> 2048                    819.76
>>>>>>>>> 4096                    874.54
>>>>>>>>> 8192                   1447.11
>>>>>>>>> 16384                  2263.28
>>>>>>>>> 32768                  3236.85
>>>>>>>>> 65536                  3567.34
>>>>>>>>> 131072                 3555.17
>>>>>>>>> 262144                 3455.76
>>>>>>>>> 524288                 3441.80
>>>>>>>>> 1048576                3505.30
>>>>>>>>> 2097152                3534.01
>>>>>>>>> 4194304                3546.94
>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca pml ob1
>>>>>> -mca
>>>>>>> btl
>>>>>>>>> self,sm,openib -bind-to core -report-binding
>>>>>>>>> s osu_bw
>>>>>>>>> [manage.cluster:18218] MCW rank 0 bound to socket 0[core 0[hwt
>> 0]]:
>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>> [manage.cluster:18218] MCW rank 1 bound to socket 0[core 1[hwt
>> 0]]:
>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>>>> 1                         0.51
>>>>>>>>> 2                         1.03
>>>>>>>>> 4                         2.05
>>>>>>>>> 8                         4.07
>>>>>>>>> 16                        8.14
>>>>>>>>> 32                       16.32
>>>>>>>>> 64                       32.98
>>>>>>>>> 128                      63.70
>>>>>>>>> 256                     126.66
>>>>>>>>> 512                     252.61
>>>>>>>>> 1024                    480.22
>>>>>>>>> 2048                    810.54
>>>>>>>>> 4096                    290.61
>>>>>>>>> 8192                    512.49
>>>>>>>>> 16384                   764.60
>>>>>>>>> 32768                  1036.81
>>>>>>>>> 65536                  1182.81
>>>>>>>>> 131072                 1264.48
>>>>>>>>> 262144                 1235.82
>>>>>>>>> 524288                 1246.70
>>>>>>>>> 1048576                1254.66
>>>>>>>>> 2097152                1274.64
>>>>>>>>> 4194304                1280.65
>>>>>>>>> [mishima@manage OMB-3.1.1-openmpi2.0.0]$ mpirun -np 2 -mca pml ob1
>>>>>> -mca
>>>>>>> btl
>>>>>>>>> self,openib -bind-to core -report-bindings o
>>>>>>>>> su_bw
>>>>>>>>> [manage.cluster:18276] MCW rank 0 bound to socket 0[core 0[hwt
>> 0]]:
>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>> [manage.cluster:18276] MCW rank 1 bound to socket 0[core 1[hwt
>> 0]]:
>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>>>> 1                         0.54
>>>>>>>>> 2                         1.08
>>>>>>>>> 4                         2.18
>>>>>>>>> 8                         4.33
>>>>>>>>> 16                        8.69
>>>>>>>>> 32                       17.39
>>>>>>>>> 64                       34.34
>>>>>>>>> 128                      66.28
>>>>>>>>> 256                     130.36
>>>>>>>>> 512                     241.81
>>>>>>>>> 1024                    429.86
>>>>>>>>> 2048                    553.44
>>>>>>>>> 4096                    707.14
>>>>>>>>> 8192                    879.60
>>>>>>>>> 16384                   763.02
>>>>>>>>> 32768                  1042.89
>>>>>>>>> 65536                  1185.45
>>>>>>>>> 131072                 1267.56
>>>>>>>>> 262144                 1227.41
>>>>>>>>> 524288                 1244.61
>>>>>>>>> 1048576                1255.66
>>>>>>>>> 2097152                1273.55
>>>>>>>>> 4194304                1281.05
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 2016/07/27 9:02:49、"devel"さんは「Re: [OMPI devel] sm BTL
>> performace
>>>>>>> of
>>>>>>>>> the openmpi-2.0.0」で書きました
>>>>>>>>>> Hi,
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> can you please run again with
>>>>>>>>>> 
>>>>>>>>>> --mca pml ob1
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> if Open MPI was built with mxm support, pml/cm and mtl/mxm are
>> used
>>>>>>>>>> instead of pml/ob1 and btl/openib
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> Cheers,
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> Gilles
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> On 7/27/2016 8:56 AM, tmish...@jcity.maeda.co.jp wrote:
>>>>>>>>>>> Hi folks,
>>>>>>>>>>> 
>>>>>>>>>>> I saw a performance degradation of openmpi-2.0.0 when I ran our
>>>>>>>>> application
>>>>>>>>>>> on a node (12cores). So I did 4 tests using osu_bw as below:
>>>>>>>>>>> 
>>>>>>>>>>> 1: mpirun –np 2 osu_bw                bad(30% of test2)
>>>>>>>>>>> 2: mpirun –np 2 –mca btl self,sm osu_bw        good(same as
>>>>>>>>> openmpi1.10.3)
>>>>>>>>>>> 3: mpirun –np 2 –mca btl self,sm,openib osu_bw    bad(30% of
>>>>>> test2)
>>>>>>>>>>> 4: mpirun –np 2 –mca btl self,openib osu_bw    bad(30% of
>> test2)
>>>>>>>>>>> 
>>>>>>>>>>> I  guess openib BTL was used in the test 1 and 3, because these
>>>>>>> results
>>>>>>>>> are
>>>>>>>>>>> almost  same  as  test  4. I believe that sm BTL should be used
>>>>>> even
>>>>>>> in
>>>>>>>>> the
>>>>>>>>>>> test 1 and 3, because its priority is higher than openib.
>>>>>>>>> Unfortunately, at
>>>>>>>>>>> the  moment,  I couldn’t figure out the root cause. So please
>>>>>> someone
>>>>>>>>> would
>>>>>>>>>>> take care of it.
>>>>>>>>>>> 
>>>>>>>>>>> Regards,
>>>>>>>>>>> Tetsuya Mishima
>>>>>>>>>>> 
>>>>>>>>>>> P.S. Here I attached these test results.
>>>>>>>>>>> 
>>>>>>>>>>> [mishima@manage   OMB-3.1.1-openmpi2.0.0]$   mpirun  -np  2
>>>>>> -bind-to
>>>>>>>>> core
>>>>>>>>>>> -report-bindings osu_bw
>>>>>>>>>>> [manage.cluster:13389]  MCW  rank  0  bound  to  socket  0[core
>> 0
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>>>> [manage.cluster:13389]  MCW  rank  1  bound  to  socket  0[core
>> 1
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>>>>>> 1                         1.49
>>>>>>>>>>> 2                         3.04
>>>>>>>>>>> 4                         6.13
>>>>>>>>>>> 8                        12.23
>>>>>>>>>>> 16                       25.01
>>>>>>>>>>> 32                       49.96
>>>>>>>>>>> 64                       87.07
>>>>>>>>>>> 128                     138.87
>>>>>>>>>>> 256                     245.97
>>>>>>>>>>> 512                     423.30
>>>>>>>>>>> 1024                    865.85
>>>>>>>>>>> 2048                   1279.63
>>>>>>>>>>> 4096                    264.79
>>>>>>>>>>> 8192                    473.92
>>>>>>>>>>> 16384                   739.27
>>>>>>>>>>> 32768                  1030.49
>>>>>>>>>>> 65536                  1190.21
>>>>>>>>>>> 131072                 1270.77
>>>>>>>>>>> 262144                 1238.74
>>>>>>>>>>> 524288                 1245.97
>>>>>>>>>>> 1048576                1260.09
>>>>>>>>>>> 2097152                1274.53
>>>>>>>>>>> 4194304                1285.07
>>>>>>>>>>> [mishima@manage  OMB-3.1.1-openmpi2.0.0]$  mpirun  -np  2  -mca
>> btl
>>>>>>>>> self,sm
>>>>>>>>>>> -bind-to core -report-bindings osu_bw
>>>>>>>>>>> [manage.cluster:13448]  MCW  rank  0  bound  to  socket  0[core
>> 0
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>>>> [manage.cluster:13448]  MCW  rank  1  bound  to  socket  0[core
>> 1
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>>>>>> 1                         0.51
>>>>>>>>>>> 2                         1.01
>>>>>>>>>>> 4                         2.03
>>>>>>>>>>> 8                         4.08
>>>>>>>>>>> 16                        7.92
>>>>>>>>>>> 32                       16.16
>>>>>>>>>>> 64                       32.53
>>>>>>>>>>> 128                      64.30
>>>>>>>>>>> 256                     128.19
>>>>>>>>>>> 512                     256.48
>>>>>>>>>>> 1024                    468.62
>>>>>>>>>>> 2048                    785.29
>>>>>>>>>>> 4096                    854.78
>>>>>>>>>>> 8192                   1404.51
>>>>>>>>>>> 16384                  2249.20
>>>>>>>>>>> 32768                  3136.40
>>>>>>>>>>> 65536                  3495.84
>>>>>>>>>>> 131072                 3436.69
>>>>>>>>>>> 262144                 3392.11
>>>>>>>>>>> 524288                 3400.07
>>>>>>>>>>> 1048576                3460.60
>>>>>>>>>>> 2097152                3488.09
>>>>>>>>>>> 4194304                3498.45
>>>>>>>>>>> [mishima@manage    OMB-3.1.1-openmpi2.0.0]$   mpirun   -np   2
>>>>>> -mca
>>>>>>>>> btl
>>>>>>>>>>> self,sm,openib -bind-to core -report-bindings osu_bw
>>>>>>>>>>> [manage.cluster:13462]  MCW  rank  0  bound  to  socket  0[core
>> 0
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [B/././././.][./././././.]
>>>>>>>>>>> [manage.cluster:13462]  MCW  rank  1  bound  to  socket  0[core
>> 1
>>>>>>> [hwt
>>>>>>>>> 0]]:
>>>>>>>>>>> [./B/./././.][./././././.]
>>>>>>>>>>> # OSU MPI Bandwidth Test v3.1.1
>>>>>>>>>>> # Size        Bandwidth (MB/s)
>>>>>>>>>>> 1                         0.54
>>>>>>>>>>> 2                         1.09
>>>>>>>>>>> 4                         2.18
>>>>>>>>>>> 8                         4.37
>>>>>>>>>>> 16                        8.75
>>>>>>>>>>> 32                       17.37
>> _______________________________________________
>> devel mailing list
>> devel@lists.open-mpi.org
>> https://rfd.newmexicoconsortium.org/mailman/listinfo/devel

_______________________________________________
devel mailing list
devel@lists.open-mpi.org
https://rfd.newmexicoconsortium.org/mailman/listinfo/devel

Reply via email to