[OMPI devel] Some questions about checkpoint/restart (9)

2010-04-02 Thread Takayuki Seki

9th question is as follows:

(9) The communication which has different element size in sender and receiver
deadlocks after taking checkpoint.

Framework : crcp
Component : bkmrk
The source file   : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : drain_message_find

Here's the code that causes the problem:

#define WORKBUFSIZE 4
#define SLPTIME 60

  int rbuf[WORKBUFSIZE];
  int j;

  MPI_Barrier(MPI_COMM_WORLD);
  if (rank == 1) {
printf("   rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);  /** take checkpoint at this point **/
printf("   rank=%d sleep end   \n",rank); fflush(stdout);
MPI_Irecv(&rbuf[0],WORKBUFSIZE,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
MPI_Wait(&req,&sts);
j=rbuf[0];
  }
  else {  /* rank 0 */
j=100;
MPI_Isend(&j,1,MPI_INT,1,1000,MPI_COMM_WORLD,&req);
printf("   rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);  /** take checkpoint at this point **/
printf("   rank=%d sleep end   \n",rank); fflush(stdout);
MPI_Wait(&req,&sts);
  }
  printf("   rank=%d pass-2 %d %d \n",rank,j,sts._count); fflush(stdout);


* Take checkpoint while Process 0 and Process 1 are in sleep function,
  then MPI program deadlocks.

* element size does not match in drain_message_find.
 drain_message_find:My=1 drain_msg=e6fc80 [peer=0/0 count=4/1 comm=6014e0 ID 
0/0/0 R=1/1 tag=1000/1000 ddt_size=4/4 [datatype->size=1]] [done=1
active=0 already_posted=0]

/* Check the datatype size, if specified for a match */
if( ddt_size != PROBE_ANY_SIZE &&
count!= PROBE_ANY_COUNT) {
/* Check the datatype size and count to make sure it matches   */
if((drain_msg->count   ) != count   ||
   (drain_msg->ddt_size) != ddt_size) {
continue;
}
}

  drain_msg->count is 1.
  countis 4.
  drain_msg->ddt_size is 4.
  ddt_sizeis 4.

* If Open MPI is built with --enable-debug configure option,
  and openib btl is selected on running MPI job,
  the following message is printed in mca_btl_openib_ft_event.

  t_mpi_question-9.out: ../../../../../ompi/mca/btl/openib/btl_openib.c:1433:
  mca_btl_openib_ft_event: Assertion `((0xdeafbeedULL << 32) + 0xdeafbeedULL) 
== ((opal_object_t *)
(&mca_btl_openib_component.ib_procs))->obj_magic_id' failed.

* The following programs behave in the same.

  1) t_mpi_question-9-packunpack.c

Sender  : MPI_Isend(&workbuf[0],j,MPI_PACKED,1,1000,MPI_COMM_WORLD,&req);
Receiver: #define WORKBUFSIZ 64
  char workbuf[WORKBUFSIZ];
  
MPI_Irecv(&workbuf[0],WORKBUFSIZ,MPI_PACKED,0,1000,MPI_COMM_WORLD,&req);

drain_message_find:My=1 drain_msg=794200 [peer=0/0 count=64/20 comm=601ba0 
ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=1/1 [datatype->size=1]] [done=1
active=0 already_posted=0]

drain_msg->count is 20.
countis 64.

  2) t_mpi_question-9-contiguous.c

Sender  : cc=MPI_Type_contiguous(50,MPI_INT,&newdt);
  cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req);
Receiver: cc=MPI_Irecv(&buf[0][0],50,MPI_INT,0,1000,MPI_COMM_WORLD,&req);

drain_message_find:My=1 drain_msg=1658200 [peer=0/0 count=50/1 comm=601840 
ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/200 [datatype->size=1]] [done=1
active=0 already_posted=0]

drain_msg->count is 1.
countis 50.
drain_msg->ddt_size is 200.
ddt_sizeis 4.

  3) t_mpi_question-9-vector.c

Sender  :  cc=MPI_Type_vector(10,1,10,MPI_INT,&newdt);
   cc=MPI_Isend(&buf[0][0],1,newdt,1,1000,MPI_COMM_WORLD,&req);
Recevier:  cc=MPI_Irecv(&buf[0][0],10,MPI_INT,0,1000,MPI_COMM_WORLD,&req);

drain_message_find:My=1 drain_msg=20ad900 [peer=0/0 count=10/1 comm=601840 
ID 0/0/0 R=1/1 tag=1000/1000 ddt_size=4/40 [datatype->size=1]] [done=1
active=0 already_posted=0]

drain_msg->count is 1.
countis 10.
drain_msg->ddt_size is 40.
ddt_sizeis 4.


-bash-3.2$ cat t_mpi_question-9.c
#include 
#include 
#include 
#include 
#include "mpi.h"

#define WORKBUFSIZE 4
#define SLPTIME 60

int main(int ac,char **av)
{
  int rank,size,cc,i,j;
  MPI_Request req;
  MPI_Status sts;
  int rbuf[WORKBUFSIZE];

  rank=0;
  j=0;
  memset((void *)rbuf,0,sizeof(int)*WORKBUFSIZE);

  MPI_Init(&ac,&av);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);

  MPI_Barrier(MPI_COMM_WORLD);

  if (rank == 1) {
printf("   rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf("   rank=%d sleep end   \n",rank); fflush(stdout);
MPI_Irecv(&rbuf[0],WORKBUFSIZE,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
MPI_Wait(&req,&sts);
j=rbuf[0];
  }
  else {
j=100;
MPI_Isend(&j,1,MPI_INT,1,1000,MPI_COMM_WORLD,&req);
printf("   rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf("   rank=%d sleep end   \n",rank); fflush(stdout);
MPI_Wait(&req,&sts);
  }

[OMPI devel] Some questions about checkpoint/restart (10)

2010-04-02 Thread Takayuki Seki

(10) Receiving which has element size 0 terminates abnormally after taking 
checkpoint.

Framework : crcp
Component : bkmrk
The source file   : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : drain_message_copy_remove

  if (rank == 0) {
j=100;
MPI_Isend(&j,0,MPI_INT,1,1000,MPI_COMM_WORLD,&req);
printf("   rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);  /** take checkpoint at this point **/
printf("   rank=%d sleep end   \n",rank); fflush(stdout);
  }
  else {
printf("   rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);  /** take checkpoint at this point **/
printf("   rank=%d sleep end   \n",rank); fflush(stdout);
MPI_Irecv(&j,0,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
  }
  MPI_Wait(&req,&sts);

* Take checkpoint while Process 0 and Process 1 are in sleep function,
  then program terminates abnormally with following message:

  *** An error occurred in MPI_Irecv
  *** on communicator MPI_COMM_WORLD
  *** MPI_ERR_BUFFER: invalid buffer pointer
  *** MPI_ERRORS_ARE_FATAL (your MPI job will now abort)

* ompi_ddt_copy_content_same_ddt function returns true
  in drain_message_copy_remove function and an error occurs.

* In drain_message_copy_remove function,
  If count is 0, it returns true.
  it is as follows:
/* empty data ? then do nothing. This should normally be trapped
 * at a higher level.
 */
if( 0 == count ) return 1;

* If count is 0,
  Is it necessary that drain_message_copy_remove function calls
  copy function(ompi_ddt_copy_content_same_ddt)?


-bash-3.2$ cat t_mpi_question-10.c
#include 
#include 
#include 
#include "mpi.h"

#define SLPTIME 60

main(int ac,char **av)
{
  int rank,size,cc,i,j;
  MPI_Request req;
  MPI_Status sts;

  rank=0;
  j=0;
  MPI_Init(&ac,&av);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);

  MPI_Barrier(MPI_COMM_WORLD);

  if (rank == 0) {
j=100;
MPI_Isend(&j,0,MPI_INT,1,1000,MPI_COMM_WORLD,&req);
printf("   rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf("   rank=%d sleep end   \n",rank); fflush(stdout);
  }
  else {
printf("   rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf("   rank=%d sleep end   \n",rank); fflush(stdout);
MPI_Irecv(&j,0,MPI_INT,0,1000,MPI_COMM_WORLD,&req);
  }
  MPI_Wait(&req,&sts);
  printf("   rank=%d pass-2 %d \n",rank,j); fflush(stdout);
  if ((rank == 1) && (j != 0)) { MPI_Abort(MPI_COMM_WORLD,1); }

  MPI_Finalize();
  if (rank ==0) {
printf("   rank=%d program end \n",rank); fflush(stdout);
  }
  return(0);
}



[OMPI devel] Some questions about checkpoint/restart (11)

2010-04-02 Thread Takayuki Seki

11th question is as follows:

(11) The communication which uses inter-communicator deadlocks after taking 
checkpoint.

Framework : crcp
Component : bkmrk
The source file   : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : :drain_message_find_any

Here's the code that causes the problem:

#define SLPTIME 60

  buf = -1;
  if (rank == 0) {
buf = 9014;
MPI_Isend(&buf,1,MPI_INT,0,1000,intercomm,&req);  /* using 
inter-communicator */

printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);  /** take checkpoint at this point **/
printf(" rank=%d sleep end   \n",rank); fflush(stdout);

MPI_Wait(&req,&sts);
  }
  else if (rank==1) {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);  /** take checkpoint at this point **/
printf(" rank=%d sleep end   \n",rank); fflush(stdout);

buf = 0;
MPI_Irecv(&buf,1,MPI_INT,0,1000,intercomm,&req);   /* using 
inter-communicator */
MPI_Wait(&req,&sts);
  }

* Take checkpoint while Process 0 and Process 1 are in sleep function,
  then MPI program deadlocks.

* Here's my debugging output.
  ft_event_post_drain_message:Irecv drain_msg_ref=8a2f80 rank=0 tag=1000 cnt=1 
ddt=4 to=8c27c0 [datatype->size=1]
  wait_quiesce_drained:xx=0 9014
  drain_message_find_any:Compare[peer=0] vpid=0 1 jobid=-431423487 -431423487 
grp_proc_count=1 89cea0 1
  drain_message_find_any:Compare[peer=0] -> Continue

* Because matching of vpid,jobid by orte_util_compare_name_fields is failed,
  drain_message_find_any function does not call drain_message_find.
  And received messages in bkmrk is not found.
  Is orte_util_compare_name_fields function corresponding to inter-communicator?


-bash-3.2$ cat t_mpi_question-11.c
#include 
#include 
#include 
#include "mpi.h"

#define SLPTIME 60

int main(int ac,char **av)
{
  int rank,size,cc,j,i,buf;
  MPI_Request req;
  MPI_Status sts;
  MPI_Comm localcomm,intercomm;
  MPI_Group worldgrp,localgrp;
  int local_grp_size,localrank,localsize,interrank,intersize;
  int *rank_list;
  int local_leader,remote_leader;

  rank=0;
  MPI_Init(&ac,&av);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  if (size%2 != 0) { MPI_Abort(MPI_COMM_WORLD,-1); }

  printf("   rank=%d pass-1 \n",rank); fflush(stdout);
  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Comm_group(MPI_COMM_WORLD,&worldgrp);

  local_grp_size = size / 2;
  rank_list = (int *)malloc(sizeof(int) * local_grp_size);
  if (rank_list == NULL) { MPI_Abort(MPI_COMM_WORLD,-1); }

  j = ((rank % 2) == 0) ? 0 : 1;
  for (i=0;i

Re: [OMPI devel] Some questions about checkpoint/restart (9)

2010-04-02 Thread Takayuki Seki


>  1) t_mpi_question-9-packunpack.c

I did not put the program in 9th mail.

The program is as follows:

-bash-3.2$ cat t_mpi_question-9-packunpack.c
#include 
#include 
#include 
#include 
#include "mpi.h"

#define SLPTIME 60

#define WORKBUFSIZ 64

struct dd {
  int  x;
  int  a;
  int  y;
  int  b;
  int  c;
};

int main(int ac,char **av)
{
  int rank,size,cc,j,i;
  MPI_Request req;
  MPI_Status sts;
  struct dd buf,ans_dd_buf;
  char workbuf[WORKBUFSIZ];

  MPI_Init(&ac,&av);

  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);

  if (rank == 0) {
buf.x = 1;
buf.a = 4329;
buf.y = 2;
buf.b = 8474;
buf.c = 48;
  }
  else {
buf.x = 0;
buf.a = 0;
buf.y = 0;
buf.b = 0;
buf.c = 0;
  }
  ans_dd_buf.x = 1;
  ans_dd_buf.a = 4329;
  ans_dd_buf.y = 2;
  ans_dd_buf.b = 8474;
  ans_dd_buf.c = 48;

  j=0; /* position */
  memset((void *)&workbuf[0],0,WORKBUFSIZ);

  if (rank == 0) {
cc=MPI_Pack(&buf.x,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Pack(&buf.a,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Pack(&buf.y,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Pack(&buf.b,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Pack(&buf.c,1,MPI_INT,&workbuf[0],WORKBUFSIZ,&j,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
MPI_Barrier(MPI_COMM_WORLD);
MPI_Isend(&workbuf[0],j,MPI_PACKED,1,1000,MPI_COMM_WORLD,&req);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end   \n",rank); fflush(stdout);
MPI_Wait(&req,&sts);
  }
  else {
MPI_Barrier(MPI_COMM_WORLD);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end   \n",rank); fflush(stdout);
MPI_Irecv(&workbuf[0],WORKBUFSIZ,MPI_PACKED,0,1000,MPI_COMM_WORLD,&req);
MPI_Wait(&req,&sts);
cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.x,1,MPI_INT,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.a,1,MPI_INT,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.y,1,MPI_INT,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.b,1,MPI_INT,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc=MPI_Unpack(&workbuf[0],WORKBUFSIZ,&j,&buf.c,1,MPI_INT,MPI_COMM_WORLD);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
  }

  printf("   rank=%d pass-3 %d %d %d %d %d \n"
,rank,buf.x,buf.a,buf.y,buf.b,buf.c); fflush(stdout);
  if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
  if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); }
  if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
  if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
  if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }

  MPI_Finalize();
  if (rank ==0) {
printf("   rank=%d program end \n",rank); fflush(stdout);
  }
  return(0);
}