Hi all,
I'm seeing some problems with dervided datatype construction and I/O
with OpenMPI 1.8.1.
I have replicated them in the attached program.
The first issue is that MPI_Type_create_hindexed_block() always
sefgaults. Usage of this routine is commented out in the program. (I
have a separate email thread with George and Edgar about this).
The other issue is a segfault in MPI_File_set_view, when I have ranks >
0 creating the derived datatypes with count 0, and rank 0 creating a
derived datatype of count NUM_BLOCKS. If I use MPI_Type_contiguous to
create the 0 sized file and memory datatypes (instead of hindexed and
hvector) it works fine.
To replicate, run the program with 2 or more procs:
mpirun -np 2 ./hindexed_io mpi_test_file
[jam:15566] *** Process received signal ***
[jam:15566] Signal: Segmentation fault (11)
[jam:15566] Signal code: Address not mapped (1)
[jam:15566] Failing at address: (nil)
[jam:15566] [ 0] [0xfcd440]
[jam:15566] [ 1]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(ADIOI_Flatten_datatype+0x17a)[0xc80f2a]
[jam:15566] [ 2]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(ADIO_Set_view+0x1c1)[0xc72a6d]
[jam:15566] [ 3]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(mca_io_romio_dist_MPI_File_set_view+0x69b)[0xc8d11b]
[jam:15566] [ 4]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(mca_io_romio_file_set_view+0x7c)[0xc4f7c5]
[jam:15566] [ 5]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(PMPI_File_set_view+0x1e6)[0xb32f7e]
[jam:15566] [ 6] ./hindexed_io[0x8048aa6]
[jam:15566] [ 7] /lib/libc.so.6(__libc_start_main+0xdc)[0x7d5ebc]
[jam:15566] [ 8] ./hindexed_io[0x80487e1]
[jam:15566] *** End of error message ***
If I use --mca io ompio with 2 or more procs, the program segfaults in
write_at_all (regardless of what routine is used to construct a 0 sized
datatype):
[jam:15687] *** Process received signal ***
[jam:15687] Signal: Floating point exception (8)
[jam:15687] Signal code: Integer divide-by-zero (1)
[jam:15687] Failing at address: 0x3e29b7
[jam:15687] [ 0] [0xe56440]
[jam:15687] [ 1]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(ompi_io_ompio_set_explicit_offset+0x9d)[0x3513bc]
[jam:15687] [ 2]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(ompio_io_ompio_file_write_at_all+0x3e)[0x35869a]
[jam:15687] [ 3]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(mca_io_ompio_file_write_at_all+0x66)[0x358650]
[jam:15687] [ 4]
/scr/chaarawi/install/ompi/lib/libmpi.so.1(MPI_File_write_at_all+0x1b3)[0x1f46f3]
[jam:15687] [ 5] ./hindexed_io[0x8048b07]
[jam:15687] [ 6] /lib/libc.so.6(__libc_start_main+0xdc)[0x7d5ebc]
[jam:15687] [ 7] ./hindexed_io[0x80487e1]
[jam:15687] *** End of error message ***
If I use mpich 3.1.2 , I don't see those issues.
Thanks,
Mohamad
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#define NUM_BLOCKS 1
#define BLOCK_SIZE 1000
int main(int argc, char** argv)
{
MPI_File fh;
MPI_Datatype file_type, mem_type;
void *data = NULL;
int rc, i, rank;
MPI_Aint disp[NUM_BLOCKS];
int block_lens[NUM_BLOCKS];
MPI_Init (&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
for(i=0 ; i<NUM_BLOCKS ; i++) {
disp[i] = (MPI_Aint)((BLOCK_SIZE+1555) * i);
block_lens[i] = BLOCK_SIZE;
}
data = malloc(BLOCK_SIZE);
memset(data, 0, BLOCK_SIZE);
if(0 == rank) {
//MPI_Type_create_hindexed_block(NUM_BLOCKS, BLOCK_SIZE, disp, MPI_BYTE, &file_type);
MPI_Type_create_hindexed(NUM_BLOCKS, block_lens, disp, MPI_BYTE, &file_type);
MPI_Type_create_hvector(NUM_BLOCKS, BLOCK_SIZE, 0, MPI_BYTE, &mem_type);
}
else {
//MPI_Type_create_hindexed_block(0, BLOCK_SIZE, disp, MPI_BYTE, &file_type);
MPI_Type_create_hindexed(0, block_lens, disp, MPI_BYTE, &file_type);
MPI_Type_create_hvector(0, BLOCK_SIZE, 0, MPI_BYTE, &mem_type);
}
MPI_Type_commit(&file_type);
MPI_Type_commit(&mem_type);
if(MPI_File_open (MPI_COMM_WORLD, argv[1], MPI_MODE_RDWR | MPI_MODE_CREATE,
MPI_INFO_NULL, &fh) != 0) {
printf("Can't open file: %s\n",argv[1]);
exit(1);
}
printf("SET VIEW\n");
if (MPI_SUCCESS != MPI_File_set_view(fh, 2144, MPI_BYTE,
file_type, "native", MPI_INFO_NULL)) {
printf ("ERROR SET VIEW\n");
exit(1);
}
printf("WRITING\n");
/* write everything */
rc = MPI_File_write_at_all (fh,
0,
data,
1,
mem_type,
MPI_STATUS_IGNORE);
if (rc != MPI_SUCCESS){
printf ("%d ERROR WRITE AT ALL\n", rc);
exit(1);
}
MPI_File_close(&fh);
MPI_Type_free (&mem_type);
MPI_Type_free(&file_type);
MPI_Finalize ();
free(data);
return 0;
}