Hi,
We observe a hang when running the multi-threading support test "latency.c"
(attached to this report), which uses MPI_THREAD_MULTIPLE.
The hang happens immediately at the begining of the test and is reproduced
in the v1.8 release branch.
The command line to reproduce the behavior is:
$ mpirun --map-by node --bind-to core -display-map -np 2 -mca pml ob1 -mca
btl tcp,self ./thread-tests-1.1/latency
The last commit with which the hang doesn't reproduce is:
commit: e4d4266d9c69e
And problems begin after commit:
commit 09b867374e9618007b81bfaf674ec6df04548bed
Author: Jeff Squyres <[email protected]>
List-Post: [email protected]
Date: Fri Oct 31 12:42:50 2014 -0700
Revert most of open-mpi/ompi@6ef938de3fa9ca0fed2c5bcb0736f65b0d8803af
Is this expected behavior? In other words, should we not expect any stable
release in the 1.8.x series to be able to use MPI_THREAD_MULTIPLE with even
the TCP and SM BTLs?
Please advise.
Thanks,
Alina.
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (C) 2007 University of Chicago
* See COPYRIGHT notice in top-level directory.
*/
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#define MAXSIZE 1024
#define NTIMES 1000
/* Measures concurrent latency (for short messages). All even ranks
send to rank i+1 and get a reply. Run on 2 nodes (with multiple processes).
*/
int main(int argc,char *argv[])
{
int rank, nprocs, src, dest, tag, i, size, incr;
double stime, etime, ttime;
char *sendbuf, *recvbuf;
int provided;
// MPI_Init(&argc,&argv);
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
if (provided != MPI_THREAD_MULTIPLE) {
printf("Thread multiple needed\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
sendbuf = (char *) malloc(MAXSIZE);
if (!sendbuf) {
printf("Cannot allocate buffer\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
recvbuf = (char *) malloc(MAXSIZE);
if (!recvbuf) {
printf("Cannot allocate buffer\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
incr = 16;
if (rank == 0) printf("Size (bytes) \t Time (us)\n");
MPI_Barrier(MPI_COMM_WORLD);
/* All even ranks send to (and recv from) rank i+1 many times */
if ((rank % 2) == 0) { /* even */
dest = rank + 1;
for (size=0; size<=MAXSIZE; size+=incr) {
tag = 0;
stime = MPI_Wtime();
for (i=0; i<NTIMES; i++) {
MPI_Send(sendbuf, size, MPI_BYTE, dest, tag, MPI_COMM_WORLD);
tag++;
MPI_Recv(recvbuf, size, MPI_BYTE, dest, tag, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
}
etime = MPI_Wtime();
ttime = (etime - stime)/(2*NTIMES);
if (rank == 0) printf("%d \t %f\n", size, ttime*1000000);
if (size == 256) incr = 64;
}
}
else { /* odd */
src = rank - 1;
for (size=0; size<=MAXSIZE; size+=incr) {
tag = 0;
for (i=0; i<NTIMES; i++) {
MPI_Recv(recvbuf, size, MPI_BYTE, src, tag, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
tag++;
MPI_Send(sendbuf, size, MPI_BYTE, src, tag, MPI_COMM_WORLD);
}
if (size == 256) incr = 64;
}
}
free(sendbuf);
free(recvbuf);
MPI_Finalize();
return 0;
}