Hi again,

Thanks Pak for the link and suggesting to start an "orted" deamon, by doing so 
my  clients and servers jobs were able to establish an intercommunicator 
between them.

However I modified my programs to perform an MPI_Allgather() of a single "int" 
over the new intercommunicator to test communication a litle bit and I did 
encountered problems. I am now wondering if there is a problem in 
MPI_Allreduce() itself for intercommunicators. Note that the same program run 
without problems with mpich2 (ch3:sock).

For example if I start orted as follows:

   orted --persistent --seed --scope public --universe univ1

and then start the server with three process:

  mpiexec --universe univ1 -n 3 ./aserver

it prints:

  Server port = '0.2.0:2000'

Now if I start the client with two process as follow (using the server port):

   mpiexec --universe univ1 -n 2 ./aclient '0.2.0:2000'

The server prints:

  intercomm_flag = 1
  intercomm_remote_size = 2
  rem_rank_tbl[2] = { 0 1}

which is the correct output. The client then prints:

  intercomm_flag = 1
  intercomm_remote_size = 3
  rem_rank_tbl[3] = { 0 1 2}
  [linux15:30895] *** An error occurred in MPI_Allgather
  [linux15:30895] *** on communicator
  [linux15:30895] *** MPI_ERR_TRUNCATE: message truncated
  [linux15:30895] *** MPI_ERRORS_ARE_FATAL (goodbye)
  mpiexec noticed that job rank 0 with PID 30894 on node linux15 exited on 
signal 15 (Terminated).

As you can see the first messages are correct but the client job terminate with 
an error (and the server hang).

After re-reading the documentation about MPI_Allgather() over an 
intercommunicator, I don't see anything wrong in my simple code. Also if I run 
the client and server process with valgrind, I get a few messages like:

  ==29821== Syscall param writev(vector[...]) points to uninitialised byte(s)
  ==29821==    at 0x36235C2130: writev (in /lib64/libc-2.3.5.so)
  ==29821==    by 0x7885583: mca_btl_tcp_frag_send (in 
/home/publique/openmpi-1.2.5/lib/openmpi/mca_btl_tcp.so)
  ==29821==    by 0x788501B: mca_btl_tcp_endpoint_send (in 
/home/publique/openmpi-1.2.5/lib/openmpi/mca_btl_tcp.so)
  ==29821==    by 0x7467947: mca_pml_ob1_send_request_start_prepare (in 
/home/publique/openmpi-1.2.5/lib/openmpi/mca_pml_ob1.so)
  ==29821==    by 0x7461494: mca_pml_ob1_isend (in 
/home/publique/openmpi-1.2.5/lib/openmpi/mca_pml_ob1.so)
  ==29821==    by 0x798BF9D: mca_coll_basic_allgather_inter (in 
/home/publique/openmpi-1.2.5/lib/openmpi/mca_coll_basic.so)
  ==29821==    by 0x4A5069C: PMPI_Allgather (in 
/home/publique/openmpi-1.2.5/lib/libmpi.so.0.0.0)
  ==29821==    by 0x400EED: main (aserver.c:53)
  ==29821==  Address 0x40d6cac is not stack'd, malloc'd or (recently) free'd

in both MPI_Allgather() and MPI_Comm_disconnect() calls for client and server 
with valgrind always reporting that the address in question are "not stack'd, 
malloc'd or (recently) free'd".

So is there a problem with MPI_Allgather() on intercommunicators or am I doing 
something wrong ?

Thanks,

Martin


/* aserver.c */
#include <stdio.h>
#include <mpi.h>

#include <assert.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
   int       comm_rank,comm_size;
   char      port_name[MPI_MAX_PORT_NAME];
   MPI_Comm intercomm;
   int      ok_flag;

   int      intercomm_flag;
   int      intercomm_remote_size;
   int     *rem_rank_tbl;
   int      ii;

   MPI_Init(&argc, &argv);

   MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
   MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

   ok_flag = (comm_rank != 0) || (argc == 1);
   MPI_Bcast(&ok_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

   if (!ok_flag) {
      if (comm_rank == 0) {
         fprintf(stderr,"Usage: %s\n",argv[0]);
      }
      MPI_Abort(MPI_COMM_WORLD, 1);
   }

   MPI_Open_port(MPI_INFO_NULL, port_name);

   if (comm_rank == 0) {
      printf("Server port = '%s'\n", port_name);
   }
   MPI_Comm_accept(port_name, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &intercomm);

   MPI_Close_port(port_name);

   MPI_Comm_test_inter(intercomm, &intercomm_flag);
   if (comm_rank == 0) {
      printf("intercomm_flag = %d\n", intercomm_flag);
   }
   assert(intercomm_flag != 0);
   MPI_Comm_remote_size(intercomm, &intercomm_remote_size);
   if (comm_rank == 0) {
      printf("intercomm_remote_size = %d\n", intercomm_remote_size);
   }
   rem_rank_tbl = malloc(intercomm_remote_size*sizeof(*rem_rank_tbl));
   MPI_Allgather(&comm_rank,   1, MPI_INT,
                 rem_rank_tbl, 1, MPI_INT,
                 intercomm);
   if (comm_rank == 0) {
      printf("rem_rank_tbl[%d] = {", intercomm_remote_size);
      for (ii=0; ii < intercomm_remote_size; ii++) {
          printf(" %d", rem_rank_tbl[ii]);
      }
      printf("}\n");
   }
   free(rem_rank_tbl);

   MPI_Comm_disconnect(&intercomm);

   MPI_Finalize();

   return 0;
}

/* aclient.c */
#include <stdio.h>
#include <unistd.h>

#include <mpi.h>

#include <assert.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
   int      comm_rank,comm_size;
   int      ok_flag;
   MPI_Comm intercomm;

   int      intercomm_flag;
   int      intercomm_remote_size;
   int     *rem_rank_tbl;
   int      ii;

   MPI_Init(&argc, &argv);

   MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
   MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

   ok_flag = (comm_rank != 0)  || ((argc == 2)  &&  argv[1]  &&  (*argv[1] != 
'\0'));
   MPI_Bcast(&ok_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

   if (!ok_flag) {
      if (comm_rank == 0) {
         fprintf(stderr,"Usage: %s mpi_port\n", argv[0]);
      }
      MPI_Abort(MPI_COMM_WORLD, 1);
   }

   while (MPI_Comm_connect((comm_rank == 0) ? argv[1] : 0, MPI_INFO_NULL, 0, 
MPI_COMM_WORLD, &intercomm) != MPI_SUCCESS) {
      if (comm_rank == 0) {
         printf("MPI_Comm_connect() failled, sleeping and retrying...\n");
      }
      sleep(1);
   }

   MPI_Comm_test_inter(intercomm, &intercomm_flag);
   if (comm_rank == 0) {
      printf("intercomm_flag = %d\n", intercomm_flag);
   }
   assert(intercomm_flag != 0);
   MPI_Comm_remote_size(intercomm, &intercomm_remote_size);
   if (comm_rank == 0) {
      printf("intercomm_remote_size = %d\n", intercomm_remote_size);
   }
   rem_rank_tbl = malloc(intercomm_remote_size*sizeof(*rem_rank_tbl));
   MPI_Allgather(&comm_rank,   1, MPI_INT,
                 rem_rank_tbl, 1, MPI_INT,
                 intercomm);
   if (comm_rank == 0) {
      printf("rem_rank_tbl[%d] = {", intercomm_remote_size);
      for (ii=0; ii < intercomm_remote_size; ii++) {
          printf(" %d", rem_rank_tbl[ii]);
      }
      printf("}\n");
   }
   free(rem_rank_tbl);

   MPI_Comm_disconnect(&intercomm);

   MPI_Finalize();

   return 0;
}

Reply via email to