I don't have any problem running your test program; see below.

I think someone suggested in either this thread or a different thread that the 
ethernet drive you have might be faulty...? (I could be remembering that 
incorrectly)  Have you verified that your network stack is working properly for 
all cases?

-----
[17:50] svbu-mpi:~/mpi % cat big-send.c 
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <unistd.h>

int main(int argc, char** argv)
{
    int localID;
    int numOfPros;
    /* 2GB */
    size_t Gsize = (size_t)2 * 1024 * 1024 * 1024;

    char* g = (char*)malloc(Gsize);

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numOfPros);
    MPI_Comm_rank(MPI_COMM_WORLD, &localID);

    MPI_Datatype MPI_Type_lkchar;
    MPI_Type_contiguous(2048, MPI_BYTE, &MPI_Type_lkchar);
    MPI_Type_commit(&MPI_Type_lkchar);

    if (localID == 0) {
        printf("Sending...\n");
        MPI_Send(g, 1024*1024, MPI_Type_lkchar, 1, 1, MPI_COMM_WORLD);
        printf("Sent!\n");
    } else if (1 == localID) {
        MPI_Status status;
        printf("Receiving...\n");
        MPI_Recv(g, 1024*1024, MPI_Type_lkchar, 0, 1,  
                 MPI_COMM_WORLD, &status);
        printf("Received!\n");
    }

    printf("Rank %d all done\n", localID);
    MPI_Finalize();
    return 0;
}
[17:50] svbu-mpi:~/mpi % mpicc big-send.c -o big-send -g
[17:50] svbu-mpi:~/mpi % mpirun -np 2 --bynode hostname
svbu-mpi017
svbu-mpi018
[17:50] svbu-mpi:~/mpi % mpirun --mca btl tcp,self -np 2 --bynode big-send
Receiving...
Sending...
Sent!
Rank 0 all done
Received!
Rank 1 all done
[17:51] svbu-mpi:~/mpi % 
-----

Note that it did take a few seconds to run over 1GB ethernet.




On Dec 2, 2010, at 5:09 AM, 孟宪军 wrote:

> hi all,
> 
> I met a question recently when I tested the MPI_send and MPI_Recv functions.  
> When I run the following codes, the  processes hanged and I found there was 
> not data transmission in my network at all. 
> 
> BTW: I finished this test on two X86-64 computers with 16GB memory and 
> installed Linux.
> 
>   1 #include <stdio.h>
>   2 #include <mpi.h>
>   3 #include <stdlib.h>
>   4 #include <unistd.h>
>   5 
>   6 
>   7 int main(int argc, char** argv)
>   8 {
>   9     int localID;
>  10     int numOfPros;
>  11     size_t Gsize = (size_t)2 * 1024 * 1024 * 1024;
>  12 
>  13     char* g = (char*)malloc(Gsize);
>  14 
>  15     MPI_Init(&argc, &argv);
>  16     MPI_Comm_size(MPI_COMM_WORLD, &numOfPros);
>  17     MPI_Comm_rank(MPI_COMM_WORLD, &localID);
>  18 
>  19     MPI_Datatype MPI_Type_lkchar;
>  20     MPI_Type_contiguous(2048, MPI_BYTE, &MPI_Type_lkchar);
>  21     MPI_Type_commit(&MPI_Type_lkchar);
>  22 
>  23     if (localID == 0)
>  24     {
>  25         MPI_Send(g, 1024*1024, MPI_Type_lkchar, 1, 1, MPI_COMM_WORLD);
>  26     }
>  27 
>  28     if (localID != 0)
>  29     {
>  30         MPI_Status status;
>  31         MPI_Recv(g, 1024*1024, MPI_Type_lkchar, 0, 1, \
>  32                 MPI_COMM_WORLD, &status);
>  33     }
>  34 
>  35     MPI_Finalize();
>  36 
>  37     return 0;
>  38 }
> 
> Thanks
> Jun
> _______________________________________________
> devel mailing list
> de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/devel


-- 
Jeff Squyres
jsquy...@cisco.com
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/


Reply via email to