Hi again,

I have attached a very small example which raise the assertion.

The problem is arising from a process which does not have any element to write in the file (and then in the MPI_File_set_view)...

You can see this "bug" with openmpi 1.6.3, 1.6.4 and 1.7.0 configured with:

./configure --enable-mem-debug --enable-mem-profile --enable-memchecker
 --with-mpi-param-check --enable-debug

Just compile the given example (idx_null.cc) as-is with

mpicxx -o idx_null idx_null.cc

and run with 3 processes:

mpirun -n 3 idx_null

You can modify the example by commenting "#define WITH_ZERO_ELEMNT_BUG" to see that everything is going well when all processes have something to write.

There is no "bug" if you use openmpi 1.6.3 (and higher) without the debugging options.

Also, all is working well with mpich-3.0.3 configured with:

./configure --enable-g=yes


So, is this a wrong "assert" in openmpi?

Is there a real problem to use this code in a "release" mode?

Thanks,

Eric

On 04/05/2013 12:57 PM, Eric Chamberland wrote:
Hi all,

I have a well working (large) code which is using openmpi 1.6.3 (see
config.log here:
http://www.giref.ulaval.ca/~ericc/bug_openmpi/config.log_nodebug)

(I have used it for reading with MPI I/O with success over 1500 procs
with very large files)

However, when I use openmpi compiled with "debug" options:

./configure --enable-mem-debug --enable-mem-profile --enable-memchecker
--with-mpi-param-check --enable-debug --prefix=/opt/openmpi-1.6.3_debug
(se other config.log here:
http://www.giref.ulaval.ca/~ericc/bug_openmpi/config.log_debug) the code
is aborting with an assertion on a very small example on 2 processors.
(the same very small example is working well without the debug mode)

Here is the assertion causing an abort:

===================================

openmpi-1.6.3/opal/datatype/opal_datatype.h:

static inline int32_t
opal_datatype_is_contiguous_memory_layout( const opal_datatype_t*
datatype, int32_t count )
{
     if( !(datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) return 0;
     if( (count == 1) || (datatype->flags & OPAL_DATATYPE_FLAG_NO_GAPS)
) return 1;


/* This is the assertion:  */

     assert( (OPAL_PTRDIFF_TYPE)datatype->size != (datatype->ub -
datatype->lb) );

     return 0;
}

===================================

Does anyone can tell me what does this mean?

It happens while writing a file with MPI I/O when I am calling for the
fourth time a "MPI_File_set_view"... with different types of
MPI_Datatype created with "MPI_Type_indexed".

I am trying to reproduce the bug with a very small example to be send
here, but if anyone has a hint to give me...
(I would like: this assert is not good! just ignore it ;-) )

Thanks,

Eric
_______________________________________________
users mailing list
us...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/users

#include "mpi.h"
#include <cstdio>
#include <cstdlib>

using namespace std;

void abortOnError(int ierr) {
  if (ierr != MPI_SUCCESS) {
    printf("ERROR Returned by MPI: %d\n",ierr);
    char* lCharPtr = new char[MPI_MAX_ERROR_STRING];
    int lLongueur = 0;
    MPI_Error_string(ierr,lCharPtr, &lLongueur);
    printf("ERROR_string Returned by MPI: %s\n",lCharPtr);
    MPI_Abort( MPI_COMM_WORLD, 1 );
  }
}
// This main is showing how to have an assertion raised if you try
// to create a MPI_File_set_view with some process holding no data

#define WITH_ZERO_ELEMNT_BUG

int main(int argc, char *argv[])
{
  int rank, size, i;
  MPI_Datatype lTypeIndexIntWithExtent, lTypeIndexIntWithoutExtent;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  if (size != 3)
  {
    printf("Please run with 3 processes.\n");
    MPI_Finalize();
    return 1;
  }
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  int displacement[3];
  int* buffer = 0;

  int lTailleBuf = 0;
  if (rank == 0)
  {
    lTailleBuf = 3;
    displacement[0] = 0;
    displacement[1] = 4;
    displacement[2] = 5;
    buffer = new int[lTailleBuf];
    for (i=0; i<lTailleBuf; i++) buffer[i] = 10*(i+1);
  }
  if (rank == 1)
  {
    lTailleBuf = 2;
    displacement[0] = 1;
    displacement[1] = 2;

#ifdef WITH_ZERO_ELEMNT_BUG
    displacement[2] = 3;
    ++lTailleBuf;
#endif

    buffer = new int[lTailleBuf];
    for (i=0; i<lTailleBuf; i++) buffer[i] = -(i+1);
  }
  // BUG: A rank without any "element"
  if (rank == 2)
  {
#ifdef WITH_ZERO_ELEMNT_BUG
    lTailleBuf = 0;
#else
    displacement[0] = 0;
    lTailleBuf = 1;
    buffer = new int[lTailleBuf];
    for (i=0; i<lTailleBuf; i++) buffer[i] = 1000*(i+1);
#endif
  }

  MPI_File lFile;

  abortOnError(MPI_File_open( MPI_COMM_WORLD, const_cast<char*>("temp"), 
MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &lFile ));

  MPI_Type_create_indexed_block(lTailleBuf, 1, displacement, MPI_INT, 
&lTypeIndexIntWithoutExtent);
  MPI_Type_commit(&lTypeIndexIntWithoutExtent);

  // Here we compute the total number of int to write to resize the type:
  // Ici, on veut s'échanger le nb total de int à écrire à chaque appel pcqu'on 
doit calculer le bon "extent" du type.
  // Ça revient à dire que chaque processus ne n'écrira qu'une petite partie du 
fichier, mais devra avancer son pointeur
  // local d'écriture suffisamment loin pour ne pas écrire par dessus les 
données des autres
  int lTailleGlobale = 0;
  printf("[%d] Local size : %d \n",rank,lTailleBuf);

  MPI_Allreduce( &lTailleBuf, &lTailleGlobale, 1, MPI_INT, MPI_SUM, 
MPI_COMM_WORLD );

  printf("[%d] MPI_AllReduce : %d \n",rank,lTailleGlobale);

  //We now modify the extent of the type "type_without_extent"
  MPI_Type_create_resized( lTypeIndexIntWithoutExtent, 0, 
lTailleGlobale*sizeof(int), &lTypeIndexIntWithExtent );
  MPI_Type_commit(&lTypeIndexIntWithExtent);

  abortOnError(MPI_File_set_view( lFile, 0, MPI_INT, lTypeIndexIntWithExtent, 
const_cast<char*>("native"), MPI_INFO_NULL));

  for (int i =0; i<2;++i) {
    abortOnError(MPI_File_write_all( lFile, buffer, lTailleBuf, MPI_INT, 
MPI_STATUS_IGNORE));
    MPI_Offset lOffset,lSharedOffset;
    MPI_File_get_position(lFile, &lOffset);
    MPI_File_get_position_shared(lFile, &lSharedOffset);
    printf("[%d] Offset after write : %d int: Local: %ld Shared: %ld 
\n",rank,lTailleBuf,lOffset,lSharedOffset);

  }

  abortOnError(MPI_File_close( &lFile ));

  abortOnError(MPI_Type_free(&lTypeIndexIntWithExtent));
  abortOnError(MPI_Type_free(&lTypeIndexIntWithoutExtent));

  MPI_Finalize();
  return 0;
}

Reply via email to