[OMPI devel] Some questions about checkpoint/restart (9)
9th question is as follows: (9) The communication which has different element size in sender and receiver deadlocks after taking checkpoint. Framework : crcp Component : bkmrk The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c The function name : drain_message_find Here's the code that causes the problem: #define WORKBUFSIZE 4 #define SLPTIME 60 int rbuf[WORKBUFSIZE]; int j; MPI_Barrier(MPI_COMM_WORLD); if (rank == 1) { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&rbuf[0],WORKBUFSIZE,MPI_INT,0,1000,MPI_COMM_WORLD,&req); MPI_Wait(&req,&sts); j=rbuf[0]; } else { /* rank 0 */ j=100; MPI_Isend(&j,1,MPI_INT,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); } printf(" rank=%d pass-2 %d %d \n",rank,j,sts._count); fflush(stdout); * Take checkpoint while Process 0 and Process 1 are in sleep function, then MPI program deadlocks. * element size does not match in drain_message_find. drain_message_find:My=1 drain_msg=e6fc80 [peer=0/0 count=4/1 comm=6014e0 ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/4 [datatype->size=1]] [done=1 active=0 already_posted=0] /* Check the datatype size, if specified for a match */ if( ddt_size != PROBE_ANY_SIZE && count!= PROBE_ANY_COUNT) { /* Check the datatype size and count to make sure it matches */ if((drain_msg->count ) != count || (drain_msg->ddt_size) != ddt_size) { continue; } } drain_msg->count is 1. countis 4. drain_msg->ddt_size is 4. ddt_sizeis 4. * If Open MPI is built with --enable-debug configure option, and openib btl is selected on running MPI job, the following message is printed in mca_btl_openib_ft_event. t_mpi_question-9.out: ../../../../../ompi/mca/btl/openib/btl_openib.c:1433: mca_btl_openib_ft_event: Assertion `((0xdeafbeedULL << 32) + 0xdeafbeedULL) == ((opal_object_t *) (&mca_btl_openib_component.ib_procs))->obj_magic_id' failed. * The following programs behave in the same. 1) t_mpi_question-9-packunpack.c Sender : MPI_Isend(&workbuf[0],j,MPI_PACKED,1,1000,MPI_COMM_WORLD,&req); Receiver: #define WORKBUFSIZ 64 char workbuf[WORKBUFSIZ]; MPI_Irecv(&workbuf[0],WORKBUFSIZ,MPI_PACKED,0,1000,MPI_COMM_WORLD,&req); drain_message_find:My=1 drain_msg=794200 [peer=0/0 count=64/20 comm=601ba0 ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=1/1 [datatype->size=1]] [done=1 active=0 already_posted=0] drain_msg->count is 20. countis 64. 2) t_mpi_question-9-contiguous.c Sender : cc=MPI_Type_contiguous(50,MPI_INT,&newdt); cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); Receiver: cc=MPI_Irecv(&buf[0][0],50,MPI_INT,0,1000,MPI_COMM_WORLD,&req); drain_message_find:My=1 drain_msg=1658200 [peer=0/0 count=50/1 comm=601840 ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/200 [datatype->size=1]] [done=1 active=0 already_posted=0] drain_msg->count is 1. countis 50. drain_msg->ddt_size is 200. ddt_sizeis 4. 3) t_mpi_question-9-vector.c Sender : cc=MPI_Type_vector(10,1,10,MPI_INT,&newdt); cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req); Recevier: cc=MPI_Irecv(&buf[0][0],10,MPI_INT,0,1000,MPI_COMM_WORLD,&req); drain_message_find:My=1 drain_msg=20ad900 [peer=0/0 count=10/1 comm=601840 ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/40 [datatype->size=1]] [done=1 active=0 already_posted=0] drain_msg->count is 1. countis 10. drain_msg->ddt_size is 40. ddt_sizeis 4. -bash-3.2$ cat t_mpi_question-9.c #include #include #include #include #include "mpi.h" #define WORKBUFSIZE 4 #define SLPTIME 60 int main(int ac,char **av) { int rank,size,cc,i,j; MPI_Request req; MPI_Status sts; int rbuf[WORKBUFSIZE]; rank=0; j=0; memset((void *)rbuf,0,sizeof(int)*WORKBUFSIZE); MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Barrier(MPI_COMM_WORLD); if (rank == 1) { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&rbuf[0],WORKBUFSIZE,MPI_INT,0,1000,MPI_COMM_WORLD,&req); MPI_Wait(&req,&sts); j=rbuf[0]; } else { j=100; MPI_Isend(&j,1,MPI_INT,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); }
[OMPI devel] Some questions about checkpoint/restart (10)
(10) Receiving which has element size 0 terminates abnormally after taking checkpoint. Framework : crcp Component : bkmrk The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c The function name : drain_message_copy_remove if (rank == 0) { j=100; MPI_Isend(&j,0,MPI_INT,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&j,0,MPI_INT,0,1000,MPI_COMM_WORLD,&req); } MPI_Wait(&req,&sts); * Take checkpoint while Process 0 and Process 1 are in sleep function, then program terminates abnormally with following message: *** An error occurred in MPI_Irecv *** on communicator MPI_COMM_WORLD *** MPI_ERR_BUFFER: invalid buffer pointer *** MPI_ERRORS_ARE_FATAL (your MPI job will now abort) * ompi_ddt_copy_content_same_ddt function returns true in drain_message_copy_remove function and an error occurs. * In drain_message_copy_remove function, If count is 0, it returns true. it is as follows: /* empty data ? then do nothing. This should normally be trapped * at a higher level. */ if( 0 == count ) return 1; * If count is 0, Is it necessary that drain_message_copy_remove function calls copy function(ompi_ddt_copy_content_same_ddt)? -bash-3.2$ cat t_mpi_question-10.c #include #include #include #include "mpi.h" #define SLPTIME 60 main(int ac,char **av) { int rank,size,cc,i,j; MPI_Request req; MPI_Status sts; rank=0; j=0; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) { j=100; MPI_Isend(&j,0,MPI_INT,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); } else { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&j,0,MPI_INT,0,1000,MPI_COMM_WORLD,&req); } MPI_Wait(&req,&sts); printf(" rank=%d pass-2 %d \n",rank,j); fflush(stdout); if ((rank == 1) && (j != 0)) { MPI_Abort(MPI_COMM_WORLD,1); } MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); }
[OMPI devel] Some questions about checkpoint/restart (11)
11th question is as follows: (11) The communication which uses inter-communicator deadlocks after taking checkpoint. Framework : crcp Component : bkmrk The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c The function name : :drain_message_find_any Here's the code that causes the problem: #define SLPTIME 60 buf = -1; if (rank == 0) { buf = 9014; MPI_Isend(&buf,1,MPI_INT,0,1000,intercomm,&req); /* using inter-communicator */ printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); } else if (rank==1) { printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); /** take checkpoint at this point **/ printf(" rank=%d sleep end \n",rank); fflush(stdout); buf = 0; MPI_Irecv(&buf,1,MPI_INT,0,1000,intercomm,&req); /* using inter-communicator */ MPI_Wait(&req,&sts); } * Take checkpoint while Process 0 and Process 1 are in sleep function, then MPI program deadlocks. * Here's my debugging output. ft_event_post_drain_message:Irecv drain_msg_ref=8a2f80 rank=0 tag=1000 cnt=1 ddt=4 to=8c27c0 [datatype->size=1] wait_quiesce_drained:xx=0 9014 drain_message_find_any:Compare[peer=0] vpid=0 1 jobid=-431423487 -431423487 grp_proc_count=1 89cea0 1 drain_message_find_any:Compare[peer=0] -> Continue * Because matching of vpid,jobid by orte_util_compare_name_fields is failed, drain_message_find_any function does not call drain_message_find. And received messages in bkmrk is not found. Is orte_util_compare_name_fields function corresponding to inter-communicator? -bash-3.2$ cat t_mpi_question-11.c #include #include #include #include "mpi.h" #define SLPTIME 60 int main(int ac,char **av) { int rank,size,cc,j,i,buf; MPI_Request req; MPI_Status sts; MPI_Comm localcomm,intercomm; MPI_Group worldgrp,localgrp; int local_grp_size,localrank,localsize,interrank,intersize; int *rank_list; int local_leader,remote_leader; rank=0; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); if (size%2 != 0) { MPI_Abort(MPI_COMM_WORLD,-1); } printf(" rank=%d pass-1 \n",rank); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); MPI_Comm_group(MPI_COMM_WORLD,&worldgrp); local_grp_size = size / 2; rank_list = (int *)malloc(sizeof(int) * local_grp_size); if (rank_list == NULL) { MPI_Abort(MPI_COMM_WORLD,-1); } j = ((rank % 2) == 0) ? 0 : 1; for (i=0;i
Re: [OMPI devel] Some questions about checkpoint/restart (9)
> 1) t_mpi_question-9-packunpack.c I did not put the program in 9th mail. The program is as follows: -bash-3.2$ cat t_mpi_question-9-packunpack.c #include #include #include #include #include "mpi.h" #define SLPTIME 60 #define WORKBUFSIZ 64 struct dd { int x; int a; int y; int b; int c; }; int main(int ac,char **av) { int rank,size,cc,j,i; MPI_Request req; MPI_Status sts; struct dd buf,ans_dd_buf; char workbuf[WORKBUFSIZ]; MPI_Init(&ac,&av); MPI_Comm_rank(MPI_COMM_WORLD,&rank); MPI_Comm_size(MPI_COMM_WORLD,&size); if (rank == 0) { buf.x = 1; buf.a = 4329; buf.y = 2; buf.b = 8474; buf.c = 48; } else { buf.x = 0; buf.a = 0; buf.y = 0; buf.b = 0; buf.c = 0; } ans_dd_buf.x = 1; ans_dd_buf.a = 4329; ans_dd_buf.y = 2; ans_dd_buf.b = 8474; ans_dd_buf.c = 48; j=0; /* position */ memset((void *)&workbuf[0],0,WORKBUFSIZ); if (rank == 0) { cc=MPI_Pack(&buf.x,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Pack(&buf.a,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Pack(&buf.y,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Pack(&buf.b,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Pack(&buf.c,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } MPI_Barrier(MPI_COMM_WORLD); MPI_Isend(&workbuf[0],j,MPI_PACKED,1,1000,MPI_COMM_WORLD,&req); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Wait(&req,&sts); } else { MPI_Barrier(MPI_COMM_WORLD); printf(" rank=%d sleep start \n",rank); fflush(stdout); sleep(SLPTIME); printf(" rank=%d sleep end \n",rank); fflush(stdout); MPI_Irecv(&workbuf[0],WORKBUFSIZ,MPI_PACKED,0,1000,MPI_COMM_WORLD,&req); MPI_Wait(&req,&sts); cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.x,1,MPI_INT,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.a,1,MPI_INT,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.y,1,MPI_INT,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.b,1,MPI_INT,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.c,1,MPI_INT,MPI_COMM_WORLD); if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); } } printf(" rank=%d pass-3 %d %d %d %d %d \n" ,rank,buf.x,buf.a,buf.y,buf.b,buf.c); fflush(stdout); if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); } if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); } if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); } if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); } if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); } MPI_Finalize(); if (rank ==0) { printf(" rank=%d program end \n",rank); fflush(stdout); } return(0); }