Hi,

 I have verified the openMPI version to be 1.2.7 on all the nodes and also
ompi_info | grep thread is Thread support: posix (mpi: no, progress: no) on
these machines.

 I get the error with and without -mca oob_tcp_listen_mode listen_thread.
Sometimes, the startup takes too long with the listen_thread enabled and I
have to resort to killing and restarting the program.

 Would the following matter in any way?
 1. The head node (node where I start the mpi process) being a part of the
cluster
 2. The head node also being the root node (node with vpid 0)
 3. The head node not being a part of the cluster

 I am currently trying the above stuff and other combinations such as
tweaking -mca oob_tcp_thread_max_size.

 The test program I run is the following:

#include <boost/mpi.hpp>
#include <iostream>

int
main(int argc, char **argv)
{
  // Initialize  MPI environment
  boost::mpi::environment env(argc, argv);
  if (!env.initialized()) {
    std::cout << "Could not initialize MPI environment!" << std::endl;
    return -1;
  }
  boost::mpi::communicator world;

  // Find out my identity in the default communicator
  int myrank = world.rank();

  // Find out how many processes there are in the default communicator
  int ntasks = world.size();

  char hn[256];
  gethostname(hn, 255);

  std::cout << hn << " is node " << myrank << " of " << ntasks << std::endl;

  int allranks = boost::mpi::all_reduce(world, myrank, std::plus<int>());

  world.barrier();
  if (myrank == 0) {
    std::cout << "ranks sum to " << allranks << std::endl;
  }

  // finalize MPI environment when env is destructed
  return 0;
}


 I also tried a version without Boost::MPI with the same results.

 #include <stdio.h>
#include <mpi.h>


int main (int argc, char* argv[])
{
  int rank, size;

  MPI_Init (&argc, &argv);      /* starts MPI */
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);        /* get current process id */
  MPI_Comm_size (MPI_COMM_WORLD, &size);        /* get number of processes
*/
  char hn[256];
  gethostname(hn, 255);
  printf( "%s is node %d of %d\n", hn, rank, size );

  int all_ranks;
  int count[1024] = {1};
  MPI_Reduce_scatter (&rank,&all_ranks, count, MPI_INT, MPI_SUM,
MPI_COMM_WORLD);
  MPI_Barrier (MPI_COMM_WORLD);
  if(rank == 0 )
    printf( "ranks sum to %d\n",all_ranks);
  MPI_Finalize();
return 0;
}

Regards,

Prasanna.

Reply via email to