Oh... Thanks for fast answer! Now I understand where error. At least now code is work! But matrix calculated wrong... Now I need to think about my Fox algorithm implementation.
2015-09-24 3:54 GMT+03:00 Gilles Gouaillardet <gil...@rist.or.jp>: > Hi, > > at line 171 > MPI_Gather(&cBufProc[i*matrixSize], blockSize, MPI_DOUBLE, 0, > tmpVar[i*matrixSize], MPI_DOUBLE, 0, rowComm); > > but per the man page > > int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, > void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, > MPI_Comm comm) > > so you have > > recvbuf = 0 (!) > recvcount = tmpVar[i*matrixSize] > > i guess you meant to have recvcount = blockSize > that being said, tmpVar[i*matrixSize] is and int and it should likely be > an double * > > Cheers, > > Gilles > > > On 9/24/2015 8:13 AM, Surivinta Surivinta wrote: > > Hi everybody! > I try implement Fox algorithm via mpi, but got some errors (see below) > Can someone explain how fix it or give a way for search. > Source code attached to letter > > errors: > [estri_mobile:6337] *** An error occurred in MPI_Gather > [estri_mobile:6337] *** reported by process [1826816001,0] > [estri_mobile:6337] *** on communicator MPI COMMUNICATOR 4 SPLIT FROM 3 > [estri_mobile:6337] *** MPI_ERR_COUNT: invalid count argument > [estri_mobile:6337] *** MPI_ERRORS_ARE_FATAL (processes in this > communicator will now abort, > [estri_mobile:6337] *** and potentially your MPI job) > > > -- > С уважением. > > > _______________________________________________ > users mailing listus...@open-mpi.org > Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users > Link to this post: > http://www.open-mpi.org/community/lists/users/2015/09/27656.php > > > > _______________________________________________ > users mailing list > us...@open-mpi.org > Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/users > Link to this post: > http://www.open-mpi.org/community/lists/users/2015/09/27658.php > -- -- С уважением.
#ifndef __SOMEHEAD_H_ #define __SOMEHEAD_H_ typedef unsigned int uint; void gridCommCr(); void dataInit(double* aMatrix, double* bMatrix, int matrixSize); void printMa(double* curMatrix, int numbRow, int numbCol); void matrixScatter(double* curMatrix, double* curBufBlock, int maSize, int blockSize); void delivData(double* aMatrix, double* bMatrix, double* aMatrixBlock, double* bBufProc, int matrixSize, int blockSize); void bBlSendRecv(double* bBufProc, int blockSize); void calcParal(double* aMatrix, double* aMatrixBlock, double* bBufProc, double* cBufProc, int blockSize); void collect(double* cMatrix, double* cBufProc,int matrixSize, int blockSize); void blockAbroadcast(int iter, double* aBufProc, double* aMatrixBlock, int blockSize); void blMulti(double* aBlock, double* bBlock, double* cBlock, int matrixSize); void serialCalc(double* aMatrix, double* bMatrix, double* cMatrix, int matrixSize); void blockPrint (double* bufBlock, int blockSize, const char str[]); #endif // __SOMEHEAD_H__
#include <stdio.h> #include <mpi.h> #include <unistd.h> #include <stdlib.h> #include <errno.h> #include <math.h> #include "somehead.h" int size; // numb of CPU int rank; // numb of current proc int gridSz; // greed size (must be size*size) int gridCoord[2]; // coord proc in grid double *aMatrix; // double *bMatrix; // double *cMatrix; // double *aMatrixBlock; // block matrix for buffer A double *aBufProc; // matix A in current proc double *bBufProc; // matix B in current proc double *cBufProc; // matix C in current proc static MPI_Comm gridComm; // static MPI_Comm rowComm; // static MPI_Comm colComm; // /////////////////////////////////////////////////////////////////////////// // init data in matrices void dataInit(double* aMatrix, double* bMatrix, int matrixSize) { int value = 1; uint i, j; srand(value); for (i = 0; i < matrixSize; i++){ for (j = 0; j < matrixSize; j++){ aMatrix[i * matrixSize+j] = 1.0 + rand() % 5; bMatrix[i * matrixSize+j] = 1.0 + rand() % 7; } } } /////////////////////////////////////////////////////////////////////////////////// // create comm for 2d grid coord // define coord of proc in current grid // make comm for row and column (MPI_Cart_create) void gridCommCr() { int dimSize[2]; // for carry numb of proc in gridколичество процессов в каждой размерности сетки int period[2]; // 1 - periodicaly size, 0 - not int subDim[2]; // 1 - if dimension must be in subgrid else - 0 dimSize[0] = gridSz; dimSize[1] = gridSz; period[0] = 0; period[1] = 0; MPI_Dims_create(size, 2, dimSize); MPI_Cart_create(MPI_COMM_WORLD, 2, dimSize, period, 1, &gridComm); MPI_Cart_coords(gridComm, rank, 2, gridCoord); subDim[0] = 0; // subDim[1] = 1; // MPI_Cart_sub(gridComm, subDim, &rowComm); // subDim[0] = 1; // subDim[1] = 0; // MPI_Cart_sub(gridComm, subDim, &colComm); // // printf("Comm created!"); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////// void delivData(double* aMatrix, double* bMatrix, double* aMatrixBlock, double* bBufProc, int matrixSize, int blockSize) { matrixScatter(aMatrix, aMatrixBlock, matrixSize, blockSize); matrixScatter(bMatrix, bBufProc, matrixSize, blockSize); } void matrixScatter(double* curMatrix, double* curBufBlock, int maSize, int blockSize) { uint i; double * tempMaRow = (double*) malloc((blockSize*maSize)*sizeof(double)); // if (gridCoord[1] == 0){ MPI_Scatter(curMatrix, blockSize*maSize, MPI_DOUBLE, tempMaRow, blockSize*maSize, MPI_DOUBLE, 0, colComm); } for (i = 0; i < blockSize; i++){ MPI_Scatter(&tempMaRow[i*maSize], blockSize, MPI_DOUBLE,&(curBufBlock[i*blockSize]), blockSize, MPI_DOUBLE, 0, rowComm); } free (tempMaRow); } /////////////////////////////////////////////////////////////////////////////////////////////// void calcParal(double* aMatrix, double* aMatrixBlock, double* bBufProc, double* cBufProc, int blockSize) { int iter; for (iter = 0; iter < gridSz; iter++){ blockAbroadcast(iter, aMatrix, aMatrixBlock, blockSize); blMulti(aMatrix, bBufProc, cBufProc, blockSize); bBlSendRecv(bBufProc, blockSize); } } void blockAbroadcast(int iter, double* aBufProc, double* aMatrixBlock, int blockSize) { uint i; int tmpVar = (gridCoord[0] + iter) % gridSz; if (gridCoord[1] == tmpVar){ for(i = 0; i < blockSize*blockSize; i++){ aBufProc[i] = aMatrixBlock[i]; } } MPI_Bcast(aBufProc, blockSize*blockSize, MPI_DOUBLE, tmpVar, rowComm); } void bBlSendRecv(double* bBufProc, int blockSize) { MPI_Status status; int nextProc = gridCoord[0] + 1; if (gridCoord[0] == gridSz - 1){ nextProc = 0; } int pervProc = gridCoord[0] - 1; if(gridCoord[0] == 0){ pervProc = gridSz -1; } MPI_Sendrecv_replace( bBufProc, blockSize*blockSize, MPI_DOUBLE, nextProc, 0, pervProc, 0, colComm, &status); } void blMulti(double* aBlock, double* bBlock, double* cBlock, int matrixSize) { serialCalc(aBlock, bBlock, cBlock, matrixSize); } void serialCalc(double* aMatrix, double* bMatrix, double* cMatrix, int matrixSize) { uint i, j, k; for (i=0; i<matrixSize; i++){ for (j=0; j<matrixSize; j++){ for (k=0; k<matrixSize; k++) { cMatrix[i*matrixSize+j] += aMatrix[i*matrixSize+k]*bMatrix[k*matrixSize+j]; } } } } ////////////////////////////////////////////////////////////////////////////// void collect(double* cMatrix, double* cBufProc,int matrixSize, int blockSize) { double *tmpVar; tmpVar = malloc((matrixSize*blockSize)*sizeof(double)); uint i; for (i = 0; i < blockSize; i++) { MPI_Gather(&cBufProc[i*matrixSize], blockSize, MPI_DOUBLE, &tmpVar[i*matrixSize], blockSize, MPI_DOUBLE, 0, rowComm); } if (gridCoord[1] == 0){ MPI_Gather(tmpVar, blockSize * matrixSize, MPI_DOUBLE, cMatrix, blockSize*matrixSize, MPI_DOUBLE, 0, colComm); } free(tmpVar); } ////////////////////////////////////////////////////////////////////////////////////////// void blockPrint (double* bufBlock, int blockSize, const char str[]) { uint i; MPI_Barrier(MPI_COMM_WORLD); for (i=0; i<size; i++){ if (rank == 0){ printf("%s \n", str); } if (rank == i){ printf ("ProcRank = %d \n", rank); printMa(bufBlock, blockSize, blockSize); } MPI_Barrier(MPI_COMM_WORLD); } } void printMa(double* curMatrix, int numbRow, int numbCol) { uint i, j; for (i = 0; i < numbRow; i++){ for (j = 0; j < numbCol; j++){ printf("%7.4f \t", curMatrix[i*numbRow+j]); } printf("\n"); } } ///////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { int matrixSize = 4; // int blockSize; // uint i; //double start, end; // reserved setvbuf(stdout, 0, _IONBF, 0); // buffer off MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); gridSz = sqrt((double) size); blockSize = matrixSize / gridSz; aBufProc = (double*) malloc((blockSize*blockSize)*sizeof(double)); bBufProc = (double*) malloc((blockSize*blockSize)*sizeof(double)); cBufProc = (double*) malloc((blockSize*blockSize)*sizeof(double)); aMatrixBlock = (double*) malloc((blockSize*blockSize)*sizeof(double)); if (rank == 0){ if (size != gridSz*gridSz){ printf("Grid size must be = countCPU*countCPU\n"); MPI_Abort(MPI_COMM_WORLD, 911); MPI_Finalize(); return 0; // i know it bad but ... } } aMatrix = (double*) malloc((matrixSize*matrixSize)*sizeof(double)); bMatrix = (double*) malloc((matrixSize*matrixSize)*sizeof(double)); cMatrix = (double*) malloc((matrixSize*matrixSize)*sizeof(double)); gridCommCr(); dataInit(aMatrix, bMatrix, matrixSize); MPI_Bcast(&matrixSize, 1, MPI_INT, 0, MPI_COMM_WORLD); for (i=0; i < blockSize*blockSize; i++) cBufProc[i] = 0.0; delivData(aMatrix, bMatrix, aMatrixBlock, bBufProc, matrixSize, blockSize); calcParal(aMatrix, aMatrixBlock, bBufProc, cBufProc, blockSize); collect(cMatrix, cBufProc, matrixSize, blockSize); //printf("Matrix A:\n"); //printMa(aMatrix, matrixSize, matrixSize); //printf("Matrix B:\n"); //printMa(bMatrix, matrixSize, matrixSize); //printf("Matrix C:\n"); //printMa(cBufProc, matrixSize, matrixSize); blockPrint(aMatrix, matrixSize, "Matrix A"); blockPrint(bMatrix, matrixSize, "Matrix B"); blockPrint(cMatrix, matrixSize, "Matrix C"); MPI_Finalize(); return 0; }