Folks,

when i run
mpirun -np 1 ./intercomm_create
from the ibm test suite, it either :
- success
- hangs
- mpirun crashes (SIGSEGV) soon after writing the following message
ORTE_ERROR_LOG: Not found in file
../../../src/ompi-trunk/orte/orted/pmix/pmix_server.c at line 566

here is what happens :

first, the test program itself :
task 0 spawns task 1 : the inter communicator is ab_inter on task 0 and
parent on task 1
then
task 0 spawns task 2 : the inter communicator is ac_inter on task 0 and
parent on task 2
then
several operations (merge, barrier, ...)
and then without any synchronization :
- task 0 MPI_Comm_disconnect(ab_inter) and then
MPI_Comm_disconnect(ac_inter)
- task 1 and task 2 MPI_Comm_disconnect(parent)

i applied the attached pmix_debug.patch and ran
mpirun -np 1 --mca pmix_base_verbose 90 ./intercomm_create

basically, tasks 0 and 1 execute a native fence and in parallel, tasks 0
and 2 execute a native fence.
they both use the *same* tags on different though overlapping tasks
bottom line, task 2 leave the fences *before* task 0 enterred the fence
(it seems task 1 told task 2 it is ok to leave the fence)

a simple work around is to call MPI_Barrier before calling
MPI_Comm_disconnect

at this stage, i doubt it is even possible to get this working at the
pmix level, so the fix
might be to have MPI_Comm_disconnect invoke MPI_Barrier
the attached comm_disconnect.patch always call the barrier before
(indirectly) invoking pmix

could you please comment on this issue ?

Cheers,

Gilles

here are the relevant logs :

[soleil:00650] [[8110,3],0] pmix:native executing fence on 2 procs
[[8110,1],0] and [[8110,3],0]
[soleil:00650] [[8110,3],0]
[../../../../../../src/ompi-trunk/opal/mca/pmix/native/pmix_native.c:493] post
send to server
[soleil:00650] [[8110,3],0] posting recv on tag 5
[soleil:00650] [[8110,3],0] usock:send_nb: already connected to server -
queueing for send
[soleil:00650] [[8110,3],0] usock:send_handler called to send to server
[soleil:00650] [[8110,3],0] usock:send_handler SENDING TO SERVER
[soleil:00647] [[8110,2],0] pmix:native executing fence on 2 procs
[[8110,1],0] and [[8110,2],0]
[soleil:00647] [[8110,2],0]
[../../../../../../src/ompi-trunk/opal/mca/pmix/native/pmix_native.c:493] post
send to server
[soleil:00647] [[8110,2],0] posting recv on tag 5
[soleil:00647] [[8110,2],0] usock:send_nb: already connected to server -
queueing for send
[soleil:00647] [[8110,2],0] usock:send_handler called to send to server
[soleil:00647] [[8110,2],0] usock:send_handler SENDING TO SERVER
[soleil:00650] [[8110,3],0] usock:recv:handler called
[soleil:00650] [[8110,3],0] usock:recv:handler CONNECTED
[soleil:00650] [[8110,3],0] usock:recv:handler allocate new recv msg
[soleil:00650] usock:recv:handler read hdr
[soleil:00650] [[8110,3],0] usock:recv:handler allocate data region of
size 14
[soleil:00650] [[8110,3],0] RECVD COMPLETE MESSAGE FROM SERVER OF 14
BYTES FOR TAG 5
[soleil:00650] [[8110,3],0]
[../../../../../../src/ompi-trunk/opal/mca/pmix/native/usock_sendrecv.c:415]
post msg
[soleil:00650] [[8110,3],0] message received 14 bytes for tag 5
[soleil:00650] [[8110,3],0] checking msg on tag 5 for tag 5
[soleil:00650] [[8110,3],0] pmix:native recv callback activated with 14
bytes
[soleil:00650] [[8110,3],0] pmix:native fence released on 2 procs
[[8110,1],0] and [[8110,3],0]


Index: opal/mca/pmix/native/pmix_native.c
===================================================================
--- opal/mca/pmix/native/pmix_native.c  (revision 32594)
+++ opal/mca/pmix/native/pmix_native.c  (working copy)
@@ -390,9 +390,17 @@
     size_t i;
     uint32_t np;

-    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
+    if (2 == nprocs) {
+        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
+                        "%s pmix:native executing fence on %u procs %s and %s",
+                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned 
int)nprocs,
+                        OPAL_NAME_PRINT(procs[0]),
+                        OPAL_NAME_PRINT(procs[1]));
+    } else {
+        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                         "%s pmix:native executing fence on %u procs",
                         OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned 
int)nprocs);
+    }

     if (NULL == mca_pmix_native_component.uri) {
         /* no server available, so just return */
@@ -545,9 +553,17 @@

     OBJ_RELEASE(cb);

-    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
-                        "%s pmix:native fence released",
-                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
+    if (2 == nprocs) {
+        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
+                        "%s pmix:native fence released on %u procs %s and %s",
+                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned 
int)nprocs,
+                        OPAL_NAME_PRINT(procs[0]),
+                        OPAL_NAME_PRINT(procs[1]));
+    } else {
+        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
+                        "%s pmix:native fence released on %u procs",
+                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned 
int)nprocs);
+    }

     return OPAL_SUCCESS;
 }
Index: ompi/mpi/c/comm_disconnect.c
===================================================================
--- ompi/mpi/c/comm_disconnect.c        (revision 32594)
+++ ompi/mpi/c/comm_disconnect.c        (working copy)
@@ -10,6 +10,8 @@
  * Copyright (c) 2004-2005 The Regents of the University of California.
  *                         All rights reserved.
  * Copyright (c) 2006      Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2014      Research Organization for Information Science
+ *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  * 
  * Additional copyrights may follow
@@ -61,14 +63,13 @@

     OPAL_CR_ENTER_LIBRARY();

+    /* Always invoke coll_barrier in order to avoid a race condition in pmix */
+    (*comm)->c_coll.coll_barrier(*comm, (*comm)->c_coll.coll_barrier_module);
     if ( OMPI_COMM_IS_DYNAMIC(*comm)) {
         if (OMPI_SUCCESS != ompi_dpm.disconnect (*comm)) {
             ret = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, 
FUNC_NAME);
         }
     }
-    else {
-        (*comm)->c_coll.coll_barrier(*comm, 
(*comm)->c_coll.coll_barrier_module);
-    }

     ompi_comm_free(comm);

Reply via email to