In case where MPI is compiled with --enable-mpi-thread-multiple, a call to 
opal_using_threads() always returns 0 in the routine btl_xxx_component_init() 
of the BTLs, event if the application calls MPI_Init_thread() with 
MPI_THREAD_MULTIPLE.

This is because opal_set_using_threads(true) in ompi/runtime/ompi_mpi_init.c is 
called to late.

I propose the following patch that solves the problem for me:

diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c
index 35509cf..c2370fc 100644
--- a/ompi/runtime/ompi_mpi_init.c
+++ b/ompi/runtime/ompi_mpi_init.c
@@ -512,6 +512,13 @@ int ompi_mpi_init(int argc, char **argv, int requested, 
int *provided)
     }
#endif

+    /* If thread support was enabled, then setup OPAL to allow for
+       them. */
+    if ((OPAL_ENABLE_PROGRESS_THREADS == 1) ||
+        (*provided != MPI_THREAD_SINGLE)) {
+        opal_set_using_threads(true);
+    }
+
     /* initialize datatypes. This step should be done early as it will
      * create the local convertor and local arch used in the proc
      * init.
@@ -724,13 +731,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, 
int *provided)
        goto error;
     }

-    /* If thread support was enabled, then setup OPAL to allow for
-       them. */
-    if ((OPAL_ENABLE_PROGRESS_THREADS == 1) ||
-        (*provided != MPI_THREAD_SINGLE)) {
-        opal_set_using_threads(true);
-    }
-
     /* start PML/BTL's */
     ret = MCA_PML_CALL(enable(true));
     if( OMPI_SUCCESS != ret ) {

Reply via email to