I have simple MPI program that sends data to processor rank 0. The
communication works well but when I run the program on more than 2
processors (-np 4) the extra receivers waiting for data run on > 90%
CPU load. I understand MPI_Recv() is a blocking operation, but why
does it consume so much CPU compared to a regular system read()?
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
void process_sender(int);
void process_receiver(int);
int main(int argc, char* argv[])
{
int rank;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("Processor %d (%d) initialized\n", rank, getpid());
if( rank == 1 )
process_sender(rank);
else
process_receiver(rank);
MPI_Finalize();
}
void process_sender(int rank)
{
int i, j, size;
float data[100];
MPI_Status status;
printf("Processor %d initializing data...\n", rank);
for( i = 0; i < 100; ++i )
data[i] = i;
MPI_Comm_size(MPI_COMM_WORLD, &size);
printf("Processor %d sending data...\n", rank);
MPI_Send(data, 100, MPI_FLOAT, 0, 55, MPI_COMM_WORLD);
printf("Processor %d sent data\n", rank);
}
void process_receiver(int rank)
{
int count;
float value[200];
MPI_Status status;
printf("Processor %d waiting for data...\n", rank);
MPI_Recv(value, 200, MPI_FLOAT, MPI_ANY_SOURCE, 55,
MPI_COMM_WORLD, &status);
printf("Processor %d Got data from processor %d\n", rank,
status.MPI_SOURCE);
MPI_Get_count(&status, MPI_FLOAT, &count);
printf("Processor %d, Got %d elements\n", rank, count);
}