7th question is as follows:
(7) The result of communication which use derived datatype after
taking checkpoint is incorrect.
Framework : crcp
Component : bkmrk
The source file : ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
The function name : traffic_message_append
Framework : datatype
The source file : ompi/datatype/datatype.h
The function name : ompi_ddt_type_size
Here's the code that causes the problem:
struct dd {
char x;
float a;
char y;
float b;
int c;
};
struct dd buf,ans_dd_buf;
if (rank == 0) {
buf.x = (char)1;
buf.a = (float)4329.1003;
buf.y = (char)2;
buf.b = (float)8474.73;
buf.c = (int)48;
}
else {
buf.x = (char)0;
buf.a = (float)0;
buf.y = (char)0;
buf.b = (float)0;
buf.c = (int)0;
}
ans_dd_buf.x = (char)1;
ans_dd_buf.a = (float)4329.1003;
ans_dd_buf.y = (char)2;
ans_dd_buf.b = (float)8474.73;
ans_dd_buf.c = (int)48;
/* item number per a block */
b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
/* datatype per a block */
dt[0] = dt[2] = MPI_BYTE;
dt[1] = dt[3] = MPI_FLOAT;
dt[4] = MPI_INT;
/* disp per a block */
dp[0] = 0;
MPI_Address(&buf.x,&st);
MPI_Address(&buf.a,&cr);
dp[1] = (cr - st);
MPI_Address(&buf.y,&cr);
dp[2] = (cr - st);
MPI_Address(&buf.b,&cr);
dp[3] = (cr - st);
MPI_Address(&buf.c,&cr);
dp[4] = (cr - st);
cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc = MPI_Type_commit(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
MPI_Barrier(MPI_COMM_WORLD);
printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=
%d[%d] x->c=%d[%d]\n"
,rank
,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
);
fflush(stdout);
if (rank == 0) {
MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME); /** take checkpoint at this point **/
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Wait(&req,&sts);
MPI_Type_free(&newdt);
}
else { /* rank 1 */
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME); /** take checkpoint at this point **/
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
MPI_Wait(&req,&sts);
MPI_Type_free(&newdt);
}
if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); } /* The
error occurs at this point */
if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
* Take checkpoint while Rank 0 and Rank 1 are performing sleep
function
* Construct derived datatype from the structure dd.
* I think that image of memory mapping of the derived datatype is as
follows:
1111111111
01234567890123456789
--------------------
X###AAAAY###BBBBCCCC
--------------------
### means space.
* ddt_size for /** Quick reference to the size of the datatype */ in
ompi_crcp_bkmrk_pml_traffic_message_ref_t structure is obtained by
ompi_ddt_type_size function in traffic_message_append function.
if( NULL != datatype ) {
ompi_ddt_type_size(datatype,
&ddt_size);
* I think that the returned value of ddt_size is wrong.
The obtained value is 14.(Does it means total size in the memory is
14bytes?)
struct dd {
char x; -> charactor is 1byte.
float a; -> float is 4byte.
char y; -> charactor is 1byte.
float b; -> float is 4byte.
int c; -> integer is 4byte.
};
* But the returned value of ddt_size should be 20bytes, considering
the memory mapping.
* Rank 1 receive messages of only 14bytes in the bkmrk.
The wrong result is obtained.
* t_mpi_question-7-ng.c : the error occurs.
Here's my debugging output.
ft_event_post_drain_message:Irecv drain_msg_ref=c89200 rank=0
tag=1000 cnt=1 ddt=14 to=c929b0 [datatype->size=1]
wait_quiesce_drained: x=1 a=142658605493679655240073216.000000 y=4
b=0.000000 c=32
/* 14bytes data is received, it is incorrect. values are wrong. */
drain_message_check_recv:datatype->size=1 14 count=1 1
ompi_ddt_copy_content_same_ddt:Start size=14 flag=102/4 count=1
/* DT_FLAG_CONTIGUOUS is false. */
* t_mpi_question-7-ok.c : the error does not occur.
Here's my debugging output.
ft_event_post_drain_message:Irecv drain_msg_ref=a51280 rank=0
tag=1000 cnt=1 ddt=20 to=a5b6b0 [datatype->size=1]
wait_quiesce_drained: x=1 a=4329.100098 y=2 b=8474.730469 c=48
/* 20bytes data is received correctly. */
drain_message_check_recv:datatype->size=1 20 count=1 1
ompi_ddt_copy_content_same_ddt:Start size=20 flag=186/4 count=1
/* DT_FLAG_CONTIGUOUS is true. */
* difference list
-bash-3.2$ diff -c t_mpi_question-7-ng.c t_mpi_question-7-ok.c
*** t_mpi_question-7-ng.c Fri Feb 26 13:07:05 2010
--- t_mpi_question-7-ok.c Fri Feb 26 13:20:25 2010
***************
*** 8,16 ****
#define ITEMNUM 5
struct dd {
! char x;
float a;
! char y;
float b;
int c;
};
--- 8,16 ----
#define ITEMNUM 5
struct dd {
! int x;
float a;
! int y;
float b;
int c;
};
***************
*** 31,52 ****
MPI_Comm_size(MPI_COMM_WORLD,&size);
if (rank == 0) {
! buf.x = (char)1;
buf.a = (float)4329.1003;
! buf.y = (char)2;
buf.b = (float)8474.73;
buf.c = (int)48;
}
else {
! buf.x = (char)0;
buf.a = (float)0;
! buf.y = (char)0;
buf.b = (float)0;
buf.c = (int)0;
}
! ans_dd_buf.x = (char)1;
ans_dd_buf.a = (float)4329.1003;
! ans_dd_buf.y = (char)2;
ans_dd_buf.b = (float)8474.73;
ans_dd_buf.c = (int)48;
--- 31,52 ----
MPI_Comm_size(MPI_COMM_WORLD,&size);
if (rank == 0) {
! buf.x = (int)1;
buf.a = (float)4329.1003;
! buf.y = (int)2;
buf.b = (float)8474.73;
buf.c = (int)48;
}
else {
! buf.x = (int)0;
buf.a = (float)0;
! buf.y = (int)0;
buf.b = (float)0;
buf.c = (int)0;
}
! ans_dd_buf.x = (int)1;
ans_dd_buf.a = (float)4329.1003;
! ans_dd_buf.y = (int)2;
ans_dd_buf.b = (float)8474.73;
ans_dd_buf.c = (int)48;
***************
*** 54,60 ****
b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
/* datatype per a block */
! dt[0] = dt[2] = MPI_BYTE;
dt[1] = dt[3] = MPI_FLOAT;
dt[4] = MPI_INT;
--- 54,60 ----
b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
/* datatype per a block */
! dt[0] = dt[2] = MPI_INT;
dt[1] = dt[3] = MPI_FLOAT;
dt[4] = MPI_INT;
-bash-3.2$ cat t_mpi_question-7-ng.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "mpi.h"
#define SLPTIME 60
#define ITEMNUM 5
struct dd {
char x;
float a;
char y;
float b;
int c;
};
int main(int ac,char **av)
{
int rank,size,cc;
MPI_Request req;
MPI_Status sts;
struct dd buf,ans_dd_buf;
int b_l[ITEMNUM];
MPI_Aint dp[ITEMNUM],st,cr;
MPI_Datatype dt[ITEMNUM],newdt;
MPI_Init(&ac,&av);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
if (rank == 0) {
buf.x = (char)1;
buf.a = (float)4329.1003;
buf.y = (char)2;
buf.b = (float)8474.73;
buf.c = (int)48;
}
else {
buf.x = (char)0;
buf.a = (float)0;
buf.y = (char)0;
buf.b = (float)0;
buf.c = (int)0;
}
ans_dd_buf.x = (char)1;
ans_dd_buf.a = (float)4329.1003;
ans_dd_buf.y = (char)2;
ans_dd_buf.b = (float)8474.73;
ans_dd_buf.c = (int)48;
/* item number per a block */
b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
/* datatype per a block */
dt[0] = dt[2] = MPI_BYTE;
dt[1] = dt[3] = MPI_FLOAT;
dt[4] = MPI_INT;
/* disp per a block */
dp[0] = 0;
MPI_Address(&buf.x,&st);
MPI_Address(&buf.a,&cr);
dp[1] = (cr - st);
MPI_Address(&buf.y,&cr);
dp[2] = (cr - st);
MPI_Address(&buf.b,&cr);
dp[3] = (cr - st);
MPI_Address(&buf.c,&cr);
dp[4] = (cr - st);
cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc = MPI_Type_commit(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
MPI_Barrier(MPI_COMM_WORLD);
printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=
%d[%d] x->c=%d[%d]\n"
,rank
,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
);
fflush(stdout);
if (rank == 0) {
MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Wait(&req,&sts);
MPI_Type_free(&newdt);
}
else {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
MPI_Wait(&req,&sts);
MPI_Type_free(&newdt);
}
printf(" rank=%d pass-2 %d %f %d %f %d \n"
,rank,buf.x,buf.a,buf.y,buf.b,buf.c); fflush(stdout);
if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
cc = MPI_Finalize();
if (rank ==0) {
printf(" rank=%d program end \n",rank); fflush(stdout);
}
return(0);
}
-bash-3.2$ cat t_mpi_question-7-ok.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "mpi.h"
#define SLPTIME 60
#define ITEMNUM 5
struct dd {
int x;
float a;
int y;
float b;
int c;
};
int main(int ac,char **av)
{
int rank,size,cc;
MPI_Request req;
MPI_Status sts;
struct dd buf,ans_dd_buf;
int b_l[ITEMNUM];
MPI_Aint dp[ITEMNUM],st,cr;
MPI_Datatype dt[ITEMNUM],newdt;
MPI_Init(&ac,&av);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
MPI_Comm_size(MPI_COMM_WORLD,&size);
if (rank == 0) {
buf.x = (int)1;
buf.a = (float)4329.1003;
buf.y = (int)2;
buf.b = (float)8474.73;
buf.c = (int)48;
}
else {
buf.x = (int)0;
buf.a = (float)0;
buf.y = (int)0;
buf.b = (float)0;
buf.c = (int)0;
}
ans_dd_buf.x = (int)1;
ans_dd_buf.a = (float)4329.1003;
ans_dd_buf.y = (int)2;
ans_dd_buf.b = (float)8474.73;
ans_dd_buf.c = (int)48;
/* item number per a block */
b_l[0] = b_l[1] = b_l[2] = b_l[3] = b_l[4] = 1;
/* datatype per a block */
dt[0] = dt[2] = MPI_INT;
dt[1] = dt[3] = MPI_FLOAT;
dt[4] = MPI_INT;
/* disp per a block */
dp[0] = 0;
MPI_Address(&buf.x,&st);
MPI_Address(&buf.a,&cr);
dp[1] = (cr - st);
MPI_Address(&buf.y,&cr);
dp[2] = (cr - st);
MPI_Address(&buf.b,&cr);
dp[3] = (cr - st);
MPI_Address(&buf.c,&cr);
dp[4] = (cr - st);
cc = MPI_Type_struct(ITEMNUM,&b_l[0],&dp[0],&dt[0],&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
cc = MPI_Type_commit(&newdt);
if (cc != MPI_SUCCESS) { MPI_Abort(MPI_COMM_WORLD,-1); }
MPI_Barrier(MPI_COMM_WORLD);
printf(" rank=%d pass-1 x->x =%d[%d] x->a=%d[%d] x->y=%d[%d] x->b=
%d[%d] x->c=%d[%d]\n"
,rank
,( (int)((unsigned long)(&buf.x) - (unsigned long)(&buf.x)) ),dp[0]
,( (int)((unsigned long)(&buf.a) - (unsigned long)(&buf.x)) ),dp[1]
,( (int)((unsigned long)(&buf.y) - (unsigned long)(&buf.x)) ),dp[2]
,( (int)((unsigned long)(&buf.b) - (unsigned long)(&buf.x)) ),dp[3]
,( (int)((unsigned long)(&buf.c) - (unsigned long)(&buf.x)) ),dp[4]
);
fflush(stdout);
if (rank == 0) {
MPI_Isend(&buf,1,newdt,1,1000,MPI_COMM_WORLD,&req);
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Wait(&req,&sts);
MPI_Type_free(&newdt);
}
else {
printf(" rank=%d sleep start \n",rank); fflush(stdout);
sleep(SLPTIME);
printf(" rank=%d sleep end \n",rank); fflush(stdout);
MPI_Irecv(&buf,1,newdt,0,1000,MPI_COMM_WORLD,&req);
MPI_Wait(&req,&sts);
MPI_Type_free(&newdt);
}
printf(" rank=%d pass-2 %d %f %d %f %d \n"
,rank,buf.x,buf.a,buf.y,buf.b,buf.c); fflush(stdout);
if (ans_dd_buf.x != buf.x) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.a != buf.a) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.y != buf.y) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.b != buf.b) { MPI_Abort(MPI_COMM_WORLD,1); }
if (ans_dd_buf.c != buf.c) { MPI_Abort(MPI_COMM_WORLD,1); }
cc = MPI_Finalize();
if (rank ==0) {
printf(" rank=%d program end \n",rank); fflush(stdout);
}
return(0);
}
_______________________________________________
devel mailing list
de...@open-mpi.org
http://www.open-mpi.org/mailman/listinfo.cgi/devel