I have a problem with my MPI code, it hangs when the code is run on multiple nodes. It successfullycompletes when run on a single node. I am not sure how to debug this. Can someone help me debug this issue? Program Usage: mpicc -o string string.cpp mpirun -np 4 -npernode 2 -hostfile hosts ./string 12 0.1 0.9 10 2
MPI_Reduce Hangs in 2nd iteration: (Output cout statements from my program) 1st Iteration (Timestep 1) ----------------------------------------------------- 0 Waiting for MPI_Reduce() 0 Done Waiting for MPI_Reduce() 1 Waiting for MPI_Reduce() 1 Done Waiting for MPI_Reduce() 2 Waiting for MPI_Reduce() 2 Done Waiting for MPI_Reduce() 3 Waiting for MPI_Reduce() 3 Done Waiting for MPI_Reduce() 0 Sending to right task = 1 0 Receiving from right task = 1 1 Receiving from left task = 0 1 Sending to left task = 0 1 Sending to right task = 2 1 Receiving from right task = 2 2 Receiving from left task = 1 2 Sending to left task = 1 2 Sending to right task = 3 2 Receiving from right task = 3 3 Receiving from left task = 2 3 Sending to left task = 2 2nd Iteration (Timestep 2) ----------------------------------------------------- 0 Waiting for MPI_Reduce() 1 Waiting for MPI_Reduce() 1 Done Waiting for MPI_Reduce() 2 Waiting for MPI_Reduce() 3 Waiting for MPI_Reduce() My Code: #include <iostream> #include <vector> #include <stdio.h> #include <stdlib.h> #include "mpi.h" #define MASTER 0 int RtoL = 10; int LtoR = 20; int main ( int argc, char **argv ) { int nprocs, taskid; FILE *f = NULL; int left, right, i_start, i_end; float sum = 0; MPI_Status status; float *y, *yold; float *v, *vold; // const int NUM_MASSES = 1000; // const float Ktension = 0.1; // const float Kdamping = 0.9; // const float duration = 10.0; #if 0 if ( argc != 5 ) { std::cout << "usage: " << argv[0] << " NUM_MASSES durationInSecs Ktension Kdamping\n"; return 2; } #endif int NUM_MASSES = atoi ( argv[1] ); float duration = atof ( argv[2] ); float Ktension = atof ( argv[3] ); float Kdamping = atof ( argv[4] ); const int PICKUP_POS = NUM_MASSES / 7; const int OVERSAMPLING = 16; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); if (taskid == 0) { f = fopen ( "rstring.raw", "wb" ); if (!f) { std::cout << "can't open output file\n"; return 1; } } y = new float[NUM_MASSES]; yold = new float[NUM_MASSES]; v = new float[NUM_MASSES]; for (int i = 0; i < NUM_MASSES; i++ ) { v[i] = 0.0f; yold[i] = y[i] = 0.0f; if (i == NUM_MASSES/2 ) yold[i] = 1.0; } if (taskid == 0) { left = -1; right = 1; } else if (taskid == nprocs - 1) { left = taskid - 1; right = -1; } else { left = taskid - 1; right = taskid + 1; } i_start = taskid * (NUM_MASSES/nprocs); i_end = i_start + (NUM_MASSES/nprocs); int numIters = duration * 44100 * OVERSAMPLING;; if (argc == 6) { numIters = atoi(argv[5]); } for ( int t = 0; t < numIters; t++ ) { float sum = 0; float gsum = 0; for ( int i = i_start; i < i_end; i++ ) { if ( i == 0 || i == NUM_MASSES-1 ) { } else { float accel = Ktension * (yold[i+1] + yold[i-1] - 2*yold[i]); v[i] += accel; v[i] *= Kdamping; y[i] = yold[i] + v[i]; sum += y[i]; } } std::cout << taskid << " Waiting for MPI_Reduce()" << std::endl; MPI_Reduce(&sum, &gsum, 1, MPI_FLOAT, MPI_SUM, MASTER, MPI_COMM_WORLD); std::cout << taskid << " Done Waiting for MPI_Reduce()" << std::endl; if (taskid != 0) { MPI_Recv(&y[i_start-1], 1, MPI_FLOAT, left, LtoR, MPI_COMM_WORLD, &status); std::cout << taskid << " Receiving from left task = " << left << std::endl; MPI_Send(&y[i_start], 1, MPI_FLOAT, left, RtoL, MPI_COMM_WORLD); std::cout << taskid << " Sending to left task = " << left << std::endl; } if (taskid != nprocs - 1) { MPI_Send(&y[i_end-1],1, MPI_FLOAT, right, LtoR, MPI_COMM_WORLD); std::cout << taskid <<" Sending to right task = " << right << std::endl; MPI_Recv(&y[i_end], 1, MPI_FLOAT, right, RtoL, MPI_COMM_WORLD, &status); std::cout << taskid <<" Receiving from right task = " << right << std::endl; } //printf("After Reduce task = %d yold = %f %f %f %f\n", taskid,yold[0], yold[1], yold[2], yold[3]); //printf("After Reduce task = %d y = %f %f %f %f\n", taskid, y[0], y[1], y[2], y[3]); //printf("After Reduce task = %d v = %f %f %f %f\n", taskid, v[0], v[1], v[2], v[3]); float *tmp = y; y = yold; yold = tmp; if (taskid == 0) { //std::cout<< "sum = " << gsum << std::endl; if ( t % OVERSAMPLING == 0 ) { fwrite ( &gsum, sizeof(float), 1, f ); } } } if (taskid == 0) { fclose ( f ); } MPI_Finalize(); }