Retrying, as the last version ended up as HTML garbage... :-(
***
Hi Andre,
I've tried to work out a (rather old-style) program that structurally resembles your testcase, but using plain MPI, with mpi_put for 1-sided communication as you suggested, and avoiding associate to avoid potential issues with buffers and asynchronous communication (see another recent PR), or aliasing. It works with any number of processes that is a squared integer. One can play with it and see that it works only if one uses the right communicators etc. See attached.
Can you see how that maps to the ways you want to use teams? If it is possible to have a testcase that works with at least one other compiler, that would be a real benefit.
Cheers,
Harald
program p use mpi implicit none integer :: comm_world, comm_col, win_world, win_col integer :: rank_world, size_world, rank_col, size_col integer :: i, ierr, icol, jcol, n integer(MPI_ADDRESS_KIND) :: disp, winsize integer, allocatable, target :: data(:), col_data(:) integer, pointer, contiguous :: parr(:,:)
! initialize
call mpi_init (ierr)
if (ierr /= MPI_SUCCESS) error stop 1
comm_world = MPI_COMM_WORLD
call mpi_comm_size (comm_world, size_world, ierr)
call mpi_comm_rank (comm_world, rank_world, ierr)
if (rank_world == 0) write(0,*) "Total number of processes:", size_world
! partition into n*n processes (n columns)
n = nint (sqrt (real (size_world)))
if (n*n /= size_world) error stop "num pes must be a squared integer"
icol = rank_world / n
jcol = mod (rank_world, n)
! create communicator for n groups of n processes (columns)
call mpi_comm_split (comm_world, icol, jcol, comm_col, ierr)
call mpi_comm_size (comm_col, size_col, ierr)
call mpi_comm_rank (comm_col, rank_col, ierr)
if (size_col /= n) error stop 2
if (rank_col /= jcol) error stop 3
! prepare the "real work"
allocate (data(size_world), source=-1)
parr(0:n-1,0:n-1) => data
! Let each process does its work...
do i = 0, size_world-1
call mpi_barrier (comm_world, ierr) ! for ordered output
if (rank_world == i) write(0,*) "rank", i,': rank_col=', jcol
end do
parr(rank_col,icol) = (rank_col+1) + 100*(icol+1) ! the "result"
! ...and gather the results:
! (1) column-wise using an auxiliary array for each column
! create MPI window for 1-sided communication
if (rank_col == 0) then
allocate (col_data(0:n-1), source=-42)
winsize = n * sizeof(1)
else
allocate (col_data(0))
winsize = 0
end if
call mpi_barrier (comm_world, ierr)
call mpi_win_create (col_data, winsize, sizeof(1), &
MPI_INFO_NULL, comm_col, win_col, ierr)
call mpi_win_fence (0, win_col, ierr)
disp = rank_col
call mpi_put (parr(rank_col, icol), 1, MPI_INTEGER, &
0, disp, 1, MPI_INTEGER, win_col, ierr)
call mpi_win_fence (0, win_col, ierr)
call mpi_win_free (win_col, ierr)
! (2) gather columns: create MPI window for 1-sided communication
if (rank_world == 0) then
winsize = size(parr) * sizeof(1)
else
winsize = 0
end if
call mpi_win_create (parr, winsize, sizeof(1), &
MPI_INFO_NULL, comm_world, win_world, ierr)
call mpi_win_fence (0, win_world, ierr)
if (rank_col == 0) then
disp = icol*n
call mpi_put (col_data, size(col_data), MPI_INTEGER, &
0, disp, size(col_data), MPI_INTEGER, win_world, ierr)
end if
call mpi_win_fence (0, win_world, ierr)
call mpi_win_free (win_world, ierr)
deallocate (col_data)
! show results on processor 0:
if (rank_world == 0) then
write(0,*) "data=", data
end if
call mpi_finalize (ierr)
end program p
