I am using MPI_FILE_WRITE_AT to print out the timings of subroutines in a big 
Fortran code. I have noticed since upgrading to Open MPI 2.1.1 that sometimes 
the file to be written is corrupted. Each MPI process is supposed to write out 
a character string that is 159 characters in length, plus a line feed. 
Sometimes, I see

^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@^@

Instead of the character string. I cannot reproduce the problem consistently. 
That is, sometimes the file is fine, and sometimes the records are corrupted 
randomly. The subroutine is included below. I added some MPI_BARRIERs hoping 
that this would prevent the file from being closed too early, but that did not 
help.


SUBROUTINE DUMP_TIMERS

INTEGER, PARAMETER :: LINE_LENGTH=159
CHARACTER, PARAMETER :: LF=ACHAR(10)
CHARACTER(LEN=LINE_LENGTH+1) :: LINE,HEAD
INTEGER :: ERROR,RECORD,FH

CALL MPI_BARRIER(MPI_COMM_WORLD, IERR)

FN_CPU = 'file_cpu.csv'

CALL MPI_TYPE_CONTIGUOUS(LINE_LENGTH+1,MPI_CHARACTER,RECORD,ERROR)
CALL MPI_TYPE_COMMIT(RECORD,ERROR)
CALL 
MPI_FILE_OPEN(MPI_COMM_WORLD,FN_CPU,MPI_MODE_WRONLY+MPI_MODE_CREATE,MPI_INFO_NULL,FH,ERROR)
CALL 
MPI_FILE_SET_VIEW(FH,0_MPI_OFFSET_KIND,RECORD,RECORD,'NATIVE',MPI_INFO_NULL,ERROR)

! T_USED(1) is the time spend in the main routine; i.e. the time not spend in  
some other routine

T_USED(1) = SECOND() - T_USED(1) - SUM(T_USED(2:N_TIMERS))
WRITE(LINE,'(I5,14(",",ES10.3))') 
MYID,(T_USED(I),I=1,N_TIMERS),SUM(T_USED(1:N_TIMERS))
LINE(LINE_LENGTH+1:LINE_LENGTH+1) = LF

IF (MYID==0) THEN
   HEAD(1:LINE_LENGTH+1) = ' '
   WRITE(HEAD,'(A)') 
'Rank,MAIN,DIVG,MASS,VELO,PRES,WALL,DUMP,PART,RADI,FIRE,COMM,EVAC,HVAC,Total 
T_USED (s)'
   HEAD(LINE_LENGTH+1:LINE_LENGTH+1) = LF
   CALL 
MPI_FILE_WRITE_AT(FH,INT(0,MPI_OFFSET_KIND),HEAD,1,RECORD,MPI_STATUS_IGNORE,ERROR)
ENDIF

CALL 
MPI_FILE_WRITE_AT(FH,INT(MYID+1,MPI_OFFSET_KIND),LINE,1,RECORD,MPI_STATUS_IGNORE,ERROR)

CALL MPI_BARRIER(MPI_COMM_WORLD, IERR)

CALL MPI_FILE_CLOSE(FH,ERROR)
CALL MPI_TYPE_FREE(RECORD,ERROR)

END SUBROUTINE DUMP_TIMERS
_______________________________________________
users mailing list
users@lists.open-mpi.org
https://lists.open-mpi.org/mailman/listinfo/users

Reply via email to