Hi, My colleague tested MPI_IN_PLACE for MPI_ALLTOALL, MPI_ALLTOALLV, and MPI_ALLTOALLW, which was implemented two months ago in Open MPI trunk. And he found three bugs and created a patch.
Found bugs are: (A) Missing MPI_IN_PLACE support in self COLL component The attached alltoall-self-inplace.c fails with MPI_ERR_ARG. self COLL component also must support MPI_IN_PLACE. (B) Incorrect rcount[] index A trivial bug in the following code. for (i = 0, max_size = 0 ; i < size ; ++i) { size_t size = ext * rcounts[rank]; // should be rcounts[i] max_size = size > max_size ? size : max_size; } This causes SEGV or something. (C) For MPI_ALLTOALLV, the unit of displacements is extent, not byte Though the unit of displacements is byte for MPI_ALLTOALLW, the unit of displacements is extent for MPI_ALLTOALLV. MPI-2.2 (page 171) says: The outcome is as if each process sent a message to every other process with, MPI_Send(sendbuf + sdispls[i] · extent(sendtype), sendcounts[i], sendtype, i, ...), and received a message from every other process with a call to MPI_Recv(recvbuf + rdispls[i] · extent(recvtype), recvcounts[i], recvtype, i, ...). I attached his patch (alltoall-inplace.patch) to fix these three bugs. Takahiro Kawashima, MPI development team, Fujitsu
Index: ompi/mca/coll/self/coll_self_alltoall.c =================================================================== --- ompi/mca/coll/self/coll_self_alltoall.c (revision 28967) +++ ompi/mca/coll/self/coll_self_alltoall.c (working copy) @@ -37,6 +37,10 @@ struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { + if (MPI_IN_PLACE == sbuf) { + return MPI_SUCCESS; + } + return ompi_datatype_sndrcv(sbuf, scount, sdtype, rbuf, rcount, rdtype); } Index: ompi/mca/coll/self/coll_self_alltoallv.c =================================================================== --- ompi/mca/coll/self/coll_self_alltoallv.c (revision 28967) +++ ompi/mca/coll/self/coll_self_alltoallv.c (working copy) @@ -40,6 +40,11 @@ { int err; ptrdiff_t lb, rextent, sextent; + + if (MPI_IN_PLACE == sbuf) { + return MPI_SUCCESS; + } + err = ompi_datatype_get_extent(sdtype, &lb, &sextent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; Index: ompi/mca/coll/self/coll_self_alltoallw.c =================================================================== --- ompi/mca/coll/self/coll_self_alltoallw.c (revision 28967) +++ ompi/mca/coll/self/coll_self_alltoallw.c (working copy) @@ -39,6 +39,11 @@ { int err; ptrdiff_t lb, rextent, sextent; + + if (MPI_IN_PLACE == sbuf) { + return MPI_SUCCESS; + } + err = ompi_datatype_get_extent(sdtypes[0], &lb, &sextent); if (OMPI_SUCCESS != err) { return OMPI_ERROR; Index: ompi/mca/coll/basic/coll_basic_alltoallv.c =================================================================== --- ompi/mca/coll/basic/coll_basic_alltoallv.c (revision 28967) +++ ompi/mca/coll/basic/coll_basic_alltoallv.c (working copy) @@ -56,7 +57,7 @@ /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); for (i = 0, max_size = 0 ; i < size ; ++i) { - size_t size = ext * rcounts[rank]; + size_t size = ext * rcounts[i]; max_size = size > max_size ? size : max_size; } @@ -76,11 +77,11 @@ if (i == rank && rcounts[j]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], - tmp_buffer, (char *) rbuf + rdisps[j]); + tmp_buffer, (char *) rbuf + rdisps[j] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype, + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype, j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); if (MPI_SUCCESS != err) { goto error_hndl; } @@ -91,11 +92,11 @@ } else if (j == rank && rcounts[i]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], - tmp_buffer, (char *) rbuf + rdisps[i]); + tmp_buffer, (char *) rbuf + rdisps[i] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype, + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); if (MPI_SUCCESS != err) { goto error_hndl; } Index: ompi/mca/coll/basic/coll_basic_alltoallw.c =================================================================== --- ompi/mca/coll/basic/coll_basic_alltoallw.c (revision 28967) +++ ompi/mca/coll/basic/coll_basic_alltoallw.c (working copy) @@ -56,7 +57,7 @@ /* Find the largest receive amount */ for (i = 0, max_size = 0 ; i < size ; ++i) { ompi_datatype_type_extent (rdtypes[i], &ext); - ext *= rcounts[rank]; + ext *= rcounts[i]; max_size = ext > max_size ? ext : max_size; } Index: ompi/mca/coll/tuned/coll_tuned_alltoallv.c =================================================================== --- ompi/mca/coll/tuned/coll_tuned_alltoallv.c (revision 28967) +++ ompi/mca/coll/tuned/coll_tuned_alltoallv.c (working copy) @@ -71,7 +72,7 @@ /* Find the largest receive amount */ ompi_datatype_type_extent (rdtype, &ext); for (i = 0, max_size = 0 ; i < size ; ++i) { - size_t size = ext * rcounts[rank]; + size_t size = ext * rcounts[i]; max_size = size > max_size ? size : max_size; } @@ -91,11 +92,11 @@ if (i == rank && rcounts[j]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], - tmp_buffer, (char *) rbuf + rdisps[j]); + tmp_buffer, (char *) rbuf + rdisps[j] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype, + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype, j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); if (MPI_SUCCESS != err) { goto error_hndl; } @@ -106,11 +107,11 @@ } else if (j == rank && rcounts[i]) { /* Copy the data into the temporary buffer */ err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], - tmp_buffer, (char *) rbuf + rdisps[i]); + tmp_buffer, (char *) rbuf + rdisps[i] * ext); if (MPI_SUCCESS != err) { goto error_hndl; } /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype, + err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); if (MPI_SUCCESS != err) { goto error_hndl; }
#include <stdio.h> #include <mpi.h> int main(int argc, char *argv[]) { int buf; int counts[] = {1}; int displs[] = {0}; MPI_Datatype types[] = {MPI_INT}; MPI_Init(&argc, &argv); buf = 1; MPI_Alltoall(MPI_IN_PLACE, 1, MPI_INT, &buf, 1, MPI_INT, MPI_COMM_SELF); if (buf != 1) { fprintf(stderr, "buf is not 1.\n"); } buf = 1; MPI_Alltoallv(MPI_IN_PLACE, counts, displs, MPI_INT, &buf, counts, displs, MPI_INT, MPI_COMM_SELF); if (buf != 1) { fprintf(stderr, "buf is not 1.\n"); } buf = 1; MPI_Alltoallw(MPI_IN_PLACE, counts, displs, types, &buf, counts, displs, types, MPI_COMM_SELF); if (buf != 1) { fprintf(stderr, "buf is not 1.\n"); } MPI_Finalize(); return 0; }