George,
Copyright-added patch is attached.
I don't have my svn account so want someone to commit it.
All my reported issues are in the ALLTOALL(V|W) MPI_IN_PLACE code,
which was implemented two months ago for MPI-2.2 conformance.
Not so surprising.
P.S. Fujitsu does not yet signed the contribution agreement.
I must talk with the legal department again to sign it, sigh....
This patch is very trivial and so no issues will arise.
Thanks,
Takahiro Kawashima,
MPI development team,
Fujitsu
> Takahiro,
>
> Good catches. It's absolutely amazing that some of these errors lasted for so
> long before being discovered (especially the extent issue in the
> MPI_ALLTOALL). Please feel free to apply your patch and add the correct
> copyright at the beginning of all altered files.
>
> Thanks,
> George.
>
>
>
> On Sep 17, 2013, at 07:36 , "Kawashima, Takahiro"
> <[email protected]> wrote:
>
> > Hi,
> >
> > My colleague tested MPI_IN_PLACE for MPI_ALLTOALL, MPI_ALLTOALLV,
> > and MPI_ALLTOALLW, which was implemented two months ago in Open MPI
> > trunk. And he found three bugs and created a patch.
> >
> > Found bugs are:
> >
> > (A) Missing MPI_IN_PLACE support in self COLL component
> >
> > The attached alltoall-self-inplace.c fails with MPI_ERR_ARG.
> > self COLL component also must support MPI_IN_PLACE.
> >
> > (B) Incorrect rcount[] index
> >
> > A trivial bug in the following code.
> >
> > for (i = 0, max_size = 0 ; i < size ; ++i) {
> > size_t size = ext * rcounts[rank]; // should be rcounts[i]
> >
> > max_size = size > max_size ? size : max_size;
> > }
> >
> > This causes SEGV or something.
> >
> > (C) For MPI_ALLTOALLV, the unit of displacements is extent, not byte
> >
> > Though the unit of displacements is byte for MPI_ALLTOALLW,
> > the unit of displacements is extent for MPI_ALLTOALLV.
> >
> > MPI-2.2 (page 171) says:
> >
> > The outcome is as if each process sent a message to every
> > other process with,
> > MPI_Send(sendbuf + sdispls[i] · extent(sendtype),
> > sendcounts[i], sendtype, i, ...),
> > and received a message from every other process with a call to
> > MPI_Recv(recvbuf + rdispls[i] · extent(recvtype),
> > recvcounts[i], recvtype, i, ...).
> >
> > I attached his patch (alltoall-inplace.patch) to fix these three bugs.
Index: ompi/mca/coll/self/coll_self_alltoall.c
===================================================================
--- ompi/mca/coll/self/coll_self_alltoall.c (revision 29185)
+++ ompi/mca/coll/self/coll_self_alltoall.c (working copy)
@@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
+ * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -37,6 +38,10 @@
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
+ if (MPI_IN_PLACE == sbuf) {
+ return MPI_SUCCESS;
+ }
+
return ompi_datatype_sndrcv(sbuf, scount, sdtype,
rbuf, rcount, rdtype);
}
Index: ompi/mca/coll/self/coll_self_alltoallv.c
===================================================================
--- ompi/mca/coll/self/coll_self_alltoallv.c (revision 29185)
+++ ompi/mca/coll/self/coll_self_alltoallv.c (working copy)
@@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
+ * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -40,6 +41,11 @@
{
int err;
ptrdiff_t lb, rextent, sextent;
+
+ if (MPI_IN_PLACE == sbuf) {
+ return MPI_SUCCESS;
+ }
+
err = ompi_datatype_get_extent(sdtype, &lb, &sextent);
if (OMPI_SUCCESS != err) {
return OMPI_ERROR;
Index: ompi/mca/coll/self/coll_self_alltoallw.c
===================================================================
--- ompi/mca/coll/self/coll_self_alltoallw.c (revision 29185)
+++ ompi/mca/coll/self/coll_self_alltoallw.c (working copy)
@@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
+ * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -39,6 +40,11 @@
{
int err;
ptrdiff_t lb, rextent, sextent;
+
+ if (MPI_IN_PLACE == sbuf) {
+ return MPI_SUCCESS;
+ }
+
err = ompi_datatype_get_extent(sdtypes[0], &lb, &sextent);
if (OMPI_SUCCESS != err) {
return OMPI_ERROR;
Index: ompi/mca/coll/basic/coll_basic_alltoallv.c
===================================================================
--- ompi/mca/coll/basic/coll_basic_alltoallv.c (revision 29185)
+++ ompi/mca/coll/basic/coll_basic_alltoallv.c (working copy)
@@ -12,6 +12,7 @@
* All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
+ * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -56,7 +57,7 @@
/* Find the largest receive amount */
ompi_datatype_type_extent (rdtype, &ext);
for (i = 0, max_size = 0 ; i < size ; ++i) {
- size_t size = ext * rcounts[rank];
+ size_t size = ext * rcounts[i];
max_size = size > max_size ? size : max_size;
}
@@ -76,11 +77,11 @@
if (i == rank && rcounts[j]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
- tmp_buffer, (char *) rbuf + rdisps[j]);
+ tmp_buffer, (char *) rbuf + rdisps[j] * ext);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
- err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype,
+ err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
@@ -91,11 +92,11 @@
} else if (j == rank && rcounts[i]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
- tmp_buffer, (char *) rbuf + rdisps[i]);
+ tmp_buffer, (char *) rbuf + rdisps[i] * ext);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
- err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype,
+ err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
Index: ompi/mca/coll/basic/coll_basic_alltoallw.c
===================================================================
--- ompi/mca/coll/basic/coll_basic_alltoallw.c (revision 29185)
+++ ompi/mca/coll/basic/coll_basic_alltoallw.c (working copy)
@@ -13,6 +13,7 @@
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
+ * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -56,7 +57,7 @@
/* Find the largest receive amount */
for (i = 0, max_size = 0 ; i < size ; ++i) {
ompi_datatype_type_extent (rdtypes[i], &ext);
- ext *= rcounts[rank];
+ ext *= rcounts[i];
max_size = ext > max_size ? ext : max_size;
}
Index: ompi/mca/coll/tuned/coll_tuned_alltoallv.c
===================================================================
--- ompi/mca/coll/tuned/coll_tuned_alltoallv.c (revision 29185)
+++ ompi/mca/coll/tuned/coll_tuned_alltoallv.c (working copy)
@@ -13,6 +13,7 @@
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2013 Los Alamos National Security, LLC. All Rights
* reserved.
+ * Copyright (c) 2013 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -71,7 +72,7 @@
/* Find the largest receive amount */
ompi_datatype_type_extent (rdtype, &ext);
for (i = 0, max_size = 0 ; i < size ; ++i) {
- size_t size = ext * rcounts[rank];
+ size_t size = ext * rcounts[i];
max_size = size > max_size ? size : max_size;
}
@@ -91,11 +92,11 @@
if (i == rank && rcounts[j]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
- tmp_buffer, (char *) rbuf + rdisps[j]);
+ tmp_buffer, (char *) rbuf + rdisps[j] * ext);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
- err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j], rcounts[j], rdtype,
+ err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype,
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }
@@ -106,11 +107,11 @@
} else if (j == rank && rcounts[i]) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
- tmp_buffer, (char *) rbuf + rdisps[i]);
+ tmp_buffer, (char *) rbuf + rdisps[i] * ext);
if (MPI_SUCCESS != err) { goto error_hndl; }
/* Exchange data with the peer */
- err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i], rcounts[i], rdtype,
+ err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype,
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
if (MPI_SUCCESS != err) { goto error_hndl; }