Hi,

I'm experimenting with the MPI-2 functions for supporting the client/server 
model in MPI (e.g. server and client are independently created MPI jobs 
establishing an intercommunicator between them at run time, see section 5.4 
"Establishing Communication" of the MPI-2 standard document) and it looks like 
if MPI_Comm_connect() always fail...

That is if I compile simple client/server programs as follow (for the source, 
see bellow):

  mpicc aserver.c -o aserver
  mpicc aclient.c -o aclient

I first start the server with:

  mpiexec -n 1 ./aserver

it prints:

  Server port = '0.1.0:2000'

and then start the client as follow (and provide the port name printed by the 
server):

  mpiexec -n 1 ./aclient '0.1.0:2000'

I get the following error with the client (the server continue to run 
unperturbed):

  [linux15:27660] [0,1,0] ORTE_ERROR_LOG: Not found in file dss/dss_unpack.c at 
line 209
  [linux15:27660] [0,1,0] ORTE_ERROR_LOG: Not found in file 
communicator/comm_dyn.c at line 186
  [linux15:27660] *** An error occurred in MPI_Comm_connect
  [linux15:27660] *** on communicator MPI_COMM_WORLD
  [linux15:27660] *** MPI_ERR_INTERN: internal error
  [linux15:27660] *** MPI_ERRORS_ARE_FATAL (goodbye)

Note that both are started on the same machine (hostname linux15).

The same programs seems to work fine with mpich2 (ch3:sock device) so my 
question is: am I doing something wrong or is there a bug in OpenMPI ?

I use OpenMPI version 1.2.5 configured as follow:

   ./configure --prefix=/usr/local/openmpi-1.2.5 --disable-mpi-f77 
--disable-mpi-f90 --disable-mpi-cxx --disable-cxx-exceptions 
--with-io-romio-flags=--with-file-system=ufs+nfs

on a Linux x86_64 machine runing Fedora Core 4.

Thanks,

Martin Audet



/* aserver.c */

#include <stdio.h>
#include <mpi.h>

int main(int argc, char **argv)
{
   int       comm_rank,comm_size;
   char      port_name[MPI_MAX_PORT_NAME];
   MPI_Comm intercomm;
   int      ok_flag;

   MPI_Init(&argc, &argv);

   MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
   MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

   ok_flag = (comm_rank != 0) || (argc == 1);
   MPI_Bcast(&ok_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

   if (!ok_flag) {
      if (comm_rank == 0) {
         fprintf(stderr,"Usage: %s\n",argv[0]);
      }
      MPI_Abort(MPI_COMM_WORLD, 1);
   }

   MPI_Open_port(MPI_INFO_NULL, port_name);

   if (comm_rank == 0) {
      printf("Server port = '%s'\n", port_name);
   }
   MPI_Comm_accept(port_name, MPI_INFO_NULL, 0, MPI_COMM_WORLD, &intercomm);

   MPI_Close_port(port_name);

   MPI_Comm_disconnect(&intercomm);

   MPI_Finalize();

   return 0;
}


/* aclient.c */

#include <stdio.h>
#include <unistd.h>

#include <mpi.h>

int main(int argc, char **argv)
{
   int      comm_rank,comm_size;
   int      ok_flag;
   MPI_Comm intercomm;

   MPI_Init(&argc, &argv);

   MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
   MPI_Comm_size(MPI_COMM_WORLD, &comm_size);

   ok_flag = (comm_rank != 0)  || ((argc == 2)  &&  argv[1]  &&  (*argv[1] != 
'\0'));
   MPI_Bcast(&ok_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);

   if (!ok_flag) {
      if (comm_rank == 0) {
         fprintf(stderr,"Usage: %s mpi_port\n", argv[0]);
      }
      MPI_Abort(MPI_COMM_WORLD, 1);
   }

   while (MPI_Comm_connect((comm_rank == 0) ? argv[1] : 0, MPI_INFO_NULL, 0, 
MPI_COMM_WORLD, &intercomm) != MPI_SUCCESS) {
      if (comm_rank == 0) {
         printf("MPI_Comm_connect() failled, sleeping and retrying...\n");
      }
      sleep(1);
   }

   MPI_Comm_disconnect(&intercomm);

   MPI_Finalize();

   return 0;
}

Reply via email to