You can also have the processes with no values print an array of length zero. 
Like

   if (rank3 == PROC_ROW) then ! IF mpi PROCESS OWNS THIS ROW THEN ..
      ..
   else 
      NO_A_ENTRIES = 0
      call PetscIntView(NO_A_ENTRIES,JALOC(1:NO_A_ENTRIES),              &
     &       PETSC_VIEWER_STDOUT_WORLD, ierr_pets)

> On May 20, 2021, at 5:31 AM, Matthew Knepley <[email protected]> wrote:
> 
> On Thu, May 20, 2021 at 5:32 AM dazza simplythebest <[email protected] 
> <mailto:[email protected]>> wrote:
> Dear Jose,
>                  Many thanks for the prompt explanation - that would 
> definitely explain what is going on,
> I will adjust my code accordingly .
> 
> If you want to print different things from each process in parallel, I suggest
> 
>   
> https://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Sys/PetscSynchronizedPrintf.html
>  
> <https://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Sys/PetscSynchronizedPrintf.html>
> 
>   Thanks,
> 
>      Matt
>  
>         Thanks again,
>                  Dan.
> 
> From: Jose E. Roman <[email protected] <mailto:[email protected]>>
> Sent: Thursday, May 20, 2021 9:06 AM
> To: dazza simplythebest <[email protected] <mailto:[email protected]>>
> Cc: PETSc users list <[email protected] 
> <mailto:[email protected]>>
> Subject: Re: [petsc-users] Code hangs when calling PetscIntView (MPI, fortran)
>  
> If you look at the manpage 
> https://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Sys/PetscIntView.html
>  
> <https://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Sys/PetscIntView.html>
>  you will see that PetscIntView() is collective. This means that all MPI 
> processes must call this function, so it is forbidden to call it within an IF 
> rank==...
> 
> Jose
> 
> > El 20 may 2021, a las 10:25, dazza simplythebest <[email protected] 
> > <mailto:[email protected]>> escribió:
> > 
> > Dear All,
> >              As part of preparing a code to call the SLEPC eigenvalue 
> > solving library,
> > I am constructing a matrix in sparse CSR format row-by-row. Just for 
> > debugging 
> > purposes I write out the column values for a given row, which are stored in 
> > a 
> > PetscInt allocatable vector, using PetscIntView.
> > 
> > Everything works fine when the number of MPI processes exactly divide the
> > number of rows of the matrix, and so each process owns the same number of 
> > rows.
> > However, when the number of MPI processes does not exactly divide the
> > number of rows of the matrix, and so each process owns a different number 
> > of rows,
> > the code hangs when it reaches the line that calls PetscIntView.
> > To be precise the code hangs on the final row that a process, other than 
> > root, owns.
> > If I however comment out the call to PetscIntView the code completes 
> > without error,
> >  and produces the correct eigenvalues (hence we are not missing a row / 
> > miswriting a row).
> >    Note also that a simple direct writeout of this same array using a plain 
> > fortran command
> > will write out the array without problem.
> > 
> > I have attached below a small code that reproduces the problem.
> > For this code we have nominally assigned 200 rows to our matrix. The code 
> > runs without
> > problem using 1,2,4,5,8 or 10 MPI processes, all of which precisely divide 
> > 200,
> >  but will hang for 3 MPI processes for example.
> > For the case of 3 MPI processes the subroutine WHOSE_ROW_IS_IT allocates 
> > the rows 
> > to each process as :
> >   process no       first row           last row       no. of rows
> >    0                             1                     66               66
> >    1                            67                   133             67
> >    2                          134                   200             67
> >    
> > The code will hang when process 1 calls PetscIntView for its last row, row 
> > 133 for example.
> > 
> > One piece of additional information that may be relevant is that the code 
> > does run to completion 
> >  without hanging if I comment out the final slepc/MPI finalisation command
> >  CALL SlepcFinalize(ierr_pets) 
> > (I of course I get ' bad termination' errors, but the otherwise the run is 
> > successful.)
> >  
> >  I would appreciate it if anyone has any ideas on what is going wrong!
> >   Many thanks,
> >                        Dan.
> > 
> > 
> > code:
> > 
> >       MODULE ALL_STAB_ROUTINES
> >       IMPLICIT NONE
> >       CONTAINS
> > 
> >       SUBROUTINE WHOSE_ROW_IS_IT(ROW_NO, TOTAL_NO_ROWS, NO_PROCESSES,     &
> >      &      OWNER)
> > !     THIS ROUTINE ALLOCATES ROWS EVENLY BETWEEN mpi PROCESSES
> > #include <slepc/finclude/slepceps.h>
> >       use slepceps
> >       IMPLICIT NONE
> >       PetscInt, INTENT(IN) :: ROW_NO, TOTAL_NO_ROWS, NO_PROCESSES
> >       PetscInt, INTENT(OUT) :: OWNER
> >       PetscInt :: P, REM
> > 
> >       P = TOTAL_NO_ROWS / NO_PROCESSES ! NOTE INTEGER DIVISION
> >       REM = TOTAL_NO_ROWS - P*NO_PROCESSES
> >       IF (ROW_NO < (NO_PROCESSES - REM)*P + 1 ) THEN
> >         OWNER = (ROW_NO - 1)/P ! NOTE INTEGER DIVISION
> >       ELSE
> >         OWNER = (  ROW_NO  +   NO_PROCESSES - REM -1 )/(P+1) ! NOTE INTEGER 
> > DIVISION
> >       ENDIF    
> >       END SUBROUTINE WHOSE_ROW_IS_IT
> >       END MODULE ALL_STAB_ROUTINES
> > 
> > 
> >       PROGRAM trialer
> >       USE MPI
> > #include <slepc/finclude/slepceps.h>
> >       use slepceps
> >       USE ALL_STAB_ROUTINES
> >       IMPLICIT NONE
> >       PetscMPIInt    rank3, total_mpi_size
> >       PetscInt nl3, code,  PROC_ROW, ISTATUS, jm, N_rows,NO_A_ENTRIES
> >       PetscInt, ALLOCATABLE, DIMENSION(:) :: JALOC
> >       PetscInt, PARAMETER  ::  ZERO = 0 , ONE = 1, TWO = 2, THREE = 3  
> >       PetscErrorCode ierr_pets
> >      
> > ! Initialise sleps/mpi
> >       call SlepcInitialize(PETSC_NULL_CHARACTER,ierr_pets) ! note that this 
> > initialises MPI
> >       call MPI_COMM_SIZE(MPI_COMM_WORLD, total_mpi_size, ierr_pets) !! find 
> > total no of MPI processes  
> >       nL3= total_mpi_size
> >       call MPI_COMM_RANK(MPI_COMM_WORLD,rank3,ierr_pets) !! find my overall 
> > rank -> rank3
> >       write(*,*)'Welcome: PROCESS NO , TOTAL NO. OF PROCESSES =  ',rank3, 
> > nl3
> >  
> >       N_rows = 200 ! NUMBER OF ROWS OF A NOTIONAL MATRIX
> >       NO_A_ENTRIES = 12 ! NUMBER OF ENTRIES FOR JALOC
> >      
> > !     LOOP OVER ROWS      
> >       do jm = 1, N_rows
> >  
> >       CALL whose_row_is_it(JM,  N_rows , NL3, PROC_ROW) ! FIND OUT WHICH 
> > PROCESS OWNS ROW
> >       if (rank3 == PROC_ROW) then ! IF mpi PROCESS OWNS THIS ROW THEN ..
> > !       ALLOCATE jaloc ARRAY AND INITIALISE
> >        
> >         allocate(jaloc(NO_A_ENTRIES), STAT=ISTATUS )
> >         jaloc = three
> >        
> >        
> >         WRITE(*,*)'JALOC',JALOC ! THIS SIMPLE PLOT ALWAYS WORKS
> >         write(*,*)'calling PetscIntView: PROCESS NO. ROW NO.',rank3, jm
> >         ! THIS CALL TO PetscIntView CAUSES CODE TO HANG WHEN E.G. 
> > total_mpi_size=3, JM=133
> >         call PetscIntView(NO_A_ENTRIES,JALOC(1:NO_A_ENTRIES),              &
> >      &       PETSC_VIEWER_STDOUT_WORLD, ierr_pets)
> >         CHKERRA(ierr_pets)
> >         deallocate(jaloc)
> >       endif
> >       enddo
> > 
> >       CALL SlepcFinalize(ierr_pets)
> >       end program trialer
> 
> 
> 
> -- 
> What most experimenters take for granted before they begin their experiments 
> is infinitely more interesting than any results to which their experiments 
> lead.
> -- Norbert Wiener
> 
> https://www.cse.buffalo.edu/~knepley/ <http://www.cse.buffalo.edu/~knepley/>

Reply via email to