Hello, I am working on a fluid-mechanical code to solve the two-phase Navier?Stokes equations with levelset interface capturing. I have been asked to replace the pressure calculation which uses the SOR and Jacobi iterative schemes with a Krylov subspace method. I have done this and the code is working but as I have never used PETSc before I would like to know if improvements to my code, or the run time parameters that I am using, could be made.
I am using GMRES with a Block Jacobi pre-conditioner. I have tried Conjugate Gradient with a Block Jacobi pre-conditioner but it diverges. If I use GMRES for the first few thousand time steps and then swap to CG it does converge but the speed of execution is somewhat reduced. I have attached relevant excerpts from the code. Yours sincerely, David Scott -- Dr. D. M. Scott Applications Consultant Edinburgh Parallel Computing Centre Tel. 0131 650 5921 The University of Edinburgh is a charitable body, registered in Scotland, with registration number SC005336. -------------- next part -------------- program mainprogram use petsc use pressure_solver implicit none #include "finclude/petscdef.h" ! Many declarations deleted. PetscInt :: dof,stencil_width parameter (dof = 1, stencil_width = 1) DM :: da KSP :: ksp PetscInt :: petsc_y_ranges(0:num_procs_x-1) PetscReal :: rtol, abstol, dtol PetscInt :: maxits, its KSPType :: solver_type PC :: pc PCType :: pc_type PCSide :: pc_side double precision :: div_grad_p, residual MatNullSpace :: nullspace PetscReal :: rnorm ! **************************************************************************************** ! MPI stuff call PetscInitialize(PETSC_NULL_CHARACTER,ierr) call MPI_Comm_size(PETSC_COMM_WORLD,num_procs,ierr) call MPI_Comm_rank(PETSC_COMM_WORLD,my_id,ierr) if(my_id==0)then write(*,*) 'num_procs=', num_procs if(num_procs_x*num_procs_y*num_procs_z/=num_procs)then write(*,*) 'Error 1: domain decomposition inconsistent with number of processors, exiting' stop end if if(num_procs_z/=1)then write(*,*) 'Error 2: domain decomposition inconsistent with number of processors, exiting' stop end if if((mod(maxl-1,num_procs_x)/=0).or.(mod(maxm-1,num_procs_y)/=0))then write(*,*) 'Error 3: number of processors must evenly divide (grid dimension-1), exiting' stop end if end if dims(1) = num_procs_x dims(2) = num_procs_y dims(3) = num_procs_z periodic(1) = .false. periodic(2) = .true. periodic(3) = .false. reorder = .false. Call mpi_cart_create(PETSC_COMM_WORLD,Ndim,dims,periodic,reorder,comm2d_quasiperiodic,ierr) Call mpi_comm_rank( comm2d_quasiperiodic,my_id,ierr) Call mpi_cart_coords(comm2d_quasiperiodic,my_id,Ndim,coords,ierr) Call get_mpi_neighbours(neighbours,comm2d_quasiperiodic) call mpi_decomp_2d(sx,ex,sy,ey,n_local_x,n_local_y,maxl,maxm,coords,dims,Ndim) petsc_y_ranges = n_local_x petsc_y_ranges(0) = n_local_x + 1 petsc_y_ranges(num_procs_x-1) = n_local_x + 1 call DMDACreate3d(PETSC_COMM_WORLD, & DMDA_BOUNDARY_PERIODIC, & DMDA_BOUNDARY_NONE, & DMDA_BOUNDARY_NONE, & DMDA_STENCIL_BOX, & global_dim_x, global_dim_y+2, global_dim_z+2, & num_procs_y, num_procs_x, num_procs_z, & dof, stencil_width, & PETSC_NULL_INTEGER, & petsc_y_ranges, & PETSC_NULL_INTEGER, & da, ierr) call KSPCreate(PETSC_COMM_WORLD, ksp, ierr) call KSPSetFromOptions(ksp, ierr) call DMSetInitialGuess(da, copy_pressure_in_to_petsc, ierr) call KSPSetComputeRHS(ksp, compute_rhs, PETSC_NULL_OBJECT, ierr) call KSPSetComputeOperators(ksp, compute_matrix, PETSC_NULL_OBJECT, ierr) call KSPSetDM(ksp, da, ierr) call MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, PETSC_NULL_OBJECT, nullspace, ierr) call KSPSetNullSpace(ksp, nullspace, ierr) call MatNullSpaceDestroy(nullspace, ierr) call calculate_indices(da, sx, sy, ex, ey, n_local_x, n_local_y, my_id, ierr) ! Initialisation code deleted. ! Time loop do iteration=1,n_timesteps ! Velocity calculations (including RHS_p) deleted. call KSPSolve(ksp, PETSC_NULL_OBJECT, PETSC_NULL_OBJECT, ierr) if (my_id==60) then call output_converged_reason(ksp) call KSPGetResidualNorm(ksp, rnorm, ierr) write(*, *) 'approximate, preconditioned, residual norm', rnorm call KSPGetIterationNumber(ksp, its, ierr) write(*, *) 'iterations =', its end if call copy_pressure_out_of_petsc(ksp, ierr) ! Some more velocity stuff deleted. end do call PetscFinalize(ierr) end program mainprogram -------------- next part -------------- module pressure_solver use petsc implicit none #include "finclude/petscdef.h" PetscInt :: maxl, maxm, maxn parameter (maxl = 481, maxm = 153, maxn = 153) ! Note that the first two dimensions are swapped so that as far as PETSc is concerned ! it is the first dimension that is periodic. PetscInt :: global_dim_x, global_dim_y, global_dim_z parameter (global_dim_x = maxm-1, global_dim_y = maxl-1, global_dim_z = maxn-1) PetscInt :: x_start, y_start, z_start, x_width, y_width, z_width PetscInt :: x_max, y_max, z_max PetscInt :: x_start_ghost, y_start_ghost, z_start_ghost PetscInt :: x_width_ghost, y_width_ghost, z_width_ghost PetscInt :: x_max_ghost, y_max_ghost, z_max_ghost PetscScalar, allocatable, dimension(:, :, :) :: pres PetscScalar :: dx, dy, dz, dt PetscScalar, allocatable, dimension(:, :, :) :: RHS_p, u3 PetscScalar :: u_inlet(0:global_dim_z-1) contains subroutine calculate_indices(da, sx, sy, ex, ey, n_local_x, n_local_y, rank, ierr) implicit none DM :: da PetscInt :: sx, sy, ex, ey, n_local_x, n_local_y, rank PetscInt :: ierr call DMDAGetCorners(da, x_start, y_start, z_start, x_width, y_width, z_width, ierr) x_max = x_start + x_width - 1 y_max = y_start + y_width - 1 z_max = z_start + z_width - 1 if (sx==1 .AND. ((sx-1).NE.y_start .OR. ex/=y_max)) then write(*, *) 'ID1', rank, 'sx-1, y_start, ex, y_max, n_local_x, y_width', sx-1, y_start, ex, y_max, n_local_x, y_width end if if (ex==(maxl-1) .AND. (sx.NE.y_start .OR. (ex+1)/=y_max)) then write(*, *) 'ID2', rank, 'sx, y_start, ex+1, y_max, n_local_x, y_width', sx, y_start, ex+1, y_max, n_local_x, y_width end if if ((sx/=1 .AND. ex/=(maxl-1)) .AND. (sx/=y_start .OR. ex/=y_max)) then write(*, *) 'ID3', rank, 'sx, y_start, ex, y_max, n_local_x, y_width', sx, y_start, ex, y_max, n_local_x, y_width end if if (sy/=(x_start+1) .OR. ey/=(x_max+1)) then write(*, *) 'ID4', rank, 'sy, x_start, ey, x_max, n_local_y, x_width', sy, x_start, ey, x_max, n_local_y, x_width end if call DMDAGetGhostCorners(da, x_start_ghost, y_start_ghost, z_start_ghost, & x_width_ghost, y_width_ghost, z_width_ghost, ierr) x_max_ghost = x_start_ghost + x_width_ghost - 1 y_max_ghost = y_start_ghost + y_width_ghost - 1 z_max_ghost = z_start_ghost + z_width_ghost - 1 if ((sx-1)/=y_start_ghost .OR. (ex+1)/=y_max_ghost) then write(*, *) 'GHOST1', rank, 'sx-1, y_start_ghost, ex+1, y_max_ghost', & sx-1, y_start_ghost, ex+1, y_max_ghost end if if ((sy-1)/=(x_start_ghost+1) .OR. (ey+1)/=(x_max_ghost+1)) then write(*, *) 'GHOST2', rank, 'sy-1, x_start_ghost+1, ey+1, x_max_ghost+1', & sy-1, x_start_ghost+1, ey+1, x_max_ghost+1 end if if (z_start/=z_start_ghost .OR. z_width/=z_width_ghost) then write(*, *) 'GHOST3', rank, 'z_start, z_start_ghost, z_width, z_width_ghost', & z_start, z_start_ghost, z_width, z_width_ghost end if end subroutine calculate_indices subroutine copy_pressure_in_to_petsc(dm, x, ierr) implicit none ! This routine copies the data generated by the other parts of the TPLS code into ! locations where they can be accessed by PETSc. ! The global pressure array in the non-PETSc code is pres_global(0:maxl, 0:maxm, 0:maxn). ! This includes a bounday layer on all sides even though the second dimension is periodic ! as the periodic structure is implemented by copying data one interior face of the cuboid ! to the opposite face in the boundary layer. ! So the interior is (1:maxl-1, 1:maxm-1, 1:maxn-1). ! The indexing of the data in the PETSc code is different for two reasons. ! * PETSc lays out data on processes differently from the way that it is done in the ! non-PETSc code which necessitates swapping of the first two dimensions. ! * PETSc is instructed to impose the periodic boundary condition behind the scenes ! (through specifying DMDA_BOUNDARY_PERIODIC for the appropriate dimesnion). The ! ghost points that are required to do this are not visible in the global vector, ! but they do appear in the local vectors. ! Consequently the interior in the PETSc code is (0:maxm-2, 1:maxl-1, 1:maxn-1) ! which may written as (0:global_dim_x-1, 1:global_dim_y, 1:global_dim_z) ! Including the explicit boundaries we have ! (0:global_dim_x-1, 0:global_dim_y+1, 0:global_dim_z+1) or (0:maxm-2, 0:maxl, 0:maxn) ! Including the ghost points supplied by PETSc we have ! (-1:global_dim_x, 0:global_dim_y+1, 0:global_dim_z+1) or (-1:maxm-1, 0:maxl, 0:maxn) ! Let a pressure datum have coordinates (i1, j1, k1) in the non-PETSc code and ! (i2, j2, k2) in the PETSc code, then ! i2 = j1 - 1 ! j2 = i1 ! k2 = k1, or ! i1 = j2 ! j1 = i2+1 ! k1 = k2. DM, intent(inout) :: dm Vec, intent(inout) :: x ! x is a global vector. PetscErrorCode, intent(inout) :: ierr PetscScalar, pointer, dimension(:, :, :) :: x_3da PetscInt :: i, j, k call DMDAVecGetArrayF90(dm, x, x_3da, ierr) do k = z_start, z_max do j = y_start, y_max do i = x_start, x_max x_3da(i, j, k) = pres(j, i+1, k) end do end do end do call DMDAVecRestoreArrayF90(dm, x, x_3da, ierr) end subroutine copy_pressure_in_to_petsc subroutine compute_rhs(ksp, b, dummy, ierr) implicit none KSP, intent(inout) :: ksp Vec, intent(inout) :: b ! b is a global vector. integer, intent(inout) :: dummy(*) PetscErrorCode, intent(inout) :: ierr PetscScalar, pointer, dimension(:, :, :) :: b_3da PetscInt :: i, j, k DM :: dm call KSPGetDM(ksp, dm, ierr) call DMDAVecGetArrayF90(dm, b, b_3da, ierr) do k = z_start, z_max do j = y_start, y_max do i = x_start, x_max b_3da(i, j, k) = -RHS_p(j, i+1, k) end do end do end do if (y_start==0) then do k = 1, global_dim_z do i = x_start, x_max b_3da(i, 0, k) = (dx/dt)*(u_inlet(k-1)-u3(0, i ,k-1))/dy end do end do end if call DMDAVecRestoreArrayF90(dm, b, b_3da, ierr) end subroutine compute_rhs subroutine compute_matrix(ksp, A, B, str, dummy, ierr) implicit none KSP, intent(inout) :: ksp Mat, intent(inout) :: A, B MatStructure, intent(inout) :: str integer, intent(inout) :: dummy(*) PetscErrorCode, intent(inout) :: ierr PetscInt :: i, j, k PetscScalar :: v(7) MatStencil :: row(4), col(4, 7) do k = z_start, z_max do j = y_start, y_max do i = x_start, x_max row(MatStencil_i) = i row(MatStencil_j) = j row(MatStencil_k) = k ! Deal with the edges of the cuboid. ! The edges with i=0 and i=(global_dim_x-1) are taken care of by the periodic boundary condition. if ((j==0 .AND. k==0) .OR. & (j==(global_dim_y+1) .AND. k==0)) then v(1) = 1/dz col(MatStencil_i, 1) = i col(MatStencil_j, 1) = j col(MatStencil_k, 1) = k v(2) = -1/dz col(MatStencil_i, 2) = i col(MatStencil_j, 2) = j col(MatStencil_k, 2) = k+1 call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr) else if ((j==0 .AND. k==(global_dim_z+1)) .OR. & (j==(global_dim_y+1) .AND. k==(global_dim_z+1))) then v(1) = -1/dz col(MatStencil_i, 1) = i col(MatStencil_j, 1) = j col(MatStencil_k, 1) = k v(2) = 1/dz col(MatStencil_i, 2) = i col(MatStencil_j, 2) = j col(MatStencil_k, 2) = k-1 call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr) ! Deal with the faces of the cuboid, excluding the edges and the corners. ! The faces i=0 and i=(global_dim_x-1) are taken care of by the periodic boundary condition. else if (j==0) then ! Von Neumann boundary condition on y=0 boundary. v(1) = 1/dy col(MatStencil_i, 1) = i col(MatStencil_j, 1) = j col(MatStencil_k, 1) = k v(2) = -1/dy col(MatStencil_i, 2) = i col(MatStencil_j, 2) = j+1 col(MatStencil_k, 2) = k call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr) else if (j==(global_dim_y+1)) then ! Von Neumann boundary condition on y=(global_dim_y+1) boundary. v(1) = -1/dy col(MatStencil_i, 1) = i col(MatStencil_j, 1) = j col(MatStencil_k, 1) = k v(2) = 1/dy col(MatStencil_i, 2) = i col(MatStencil_j, 2) = j-1 col(MatStencil_k, 2) = k call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr) else if (k==0) then ! Von Neumann boundary condition on z=0 boundary. v(1) = 1/dz col(MatStencil_i, 1) = i col(MatStencil_j, 1) = j col(MatStencil_k, 1) = k v(2) = -1/dz col(MatStencil_i, 2) = i col(MatStencil_j, 2) = j col(MatStencil_k, 2) = k+1 call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr) else if (k==(global_dim_z+1)) then ! Von Neumann boundary conditions on z=(global_dim_z+1) boundary. v(1) = -1/dz col(MatStencil_i, 1) = i col(MatStencil_j, 1) = j col(MatStencil_k, 1) = k v(2) = 1/dz col(MatStencil_i, 2) = i col(MatStencil_j, 2) = j col(MatStencil_k, 2) = k-1 call MatSetValuesStencil(B, 1, row, 2, col, v, INSERT_VALUES, ierr) else ! Deal with the interior. ! Laplacian in 3D. v(1) = -1/dz**2 col(MatStencil_i, 1) = i col(MatStencil_j, 1) = j col(MatStencil_k, 1) = k-1 v(2) = -1/dy**2 col(MatStencil_i, 2) = i col(MatStencil_j, 2) = j-1 col(MatStencil_k, 2) = k v(3) = -1/dx**2 col(MatStencil_i, 3) = i-1 col(MatStencil_j, 3) = j col(MatStencil_k, 3) = k v(4) = 2*(1/dx**2+1/dy**2+1/dz**2) col(MatStencil_i, 4) = i col(MatStencil_j, 4) = j col(MatStencil_k, 4) = k v(5) = -1/dx**2 col(MatStencil_i, 5) = i+1 col(MatStencil_j, 5) = j col(MatStencil_k, 5) = k v(6) = -1/dy**2 col(MatStencil_i, 6) = i col(MatStencil_j, 6) = j+1 col(MatStencil_k, 6) = k v(7) = -1/dz**2 col(MatStencil_i, 7) = i col(MatStencil_j, 7) = j col(MatStencil_k, 7) = k+1 call MatSetValuesStencil(B, 1, row, 7, col, v, INSERT_VALUES, ierr) end if end do end do end do call MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY, ierr) call MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY, ierr) str = SAME_NONZERO_PATTERN if (A.ne.B) then call MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY, ierr) call MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY, ierr) endif end subroutine compute_matrix subroutine copy_pressure_out_of_petsc(ksp, ierr) implicit none KSP, intent(in) :: ksp PetscErrorCode, intent(out) :: ierr DM :: dm Vec :: global_vector Vec :: local_vector PetscScalar, pointer, dimension(:, :, :) :: local_vector_3da, global_vector_3da PetscInt :: i, j, k call KSPGetDM(ksp, dm, ierr) call KSPGetSolution(ksp, global_vector, ierr) call DMCreateLocalVector(dm, local_vector, ierr) call DMGlobalToLocalBegin(dm, global_vector, INSERT_VALUES, local_vector, ierr) call DMGlobalToLocalEnd(dm, global_vector, INSERT_VALUES, local_vector, ierr) call DMDAVecGetArrayF90(dm, local_vector, local_vector_3da, ierr) call DMDAVecGetArrayF90(dm, global_vector, global_vector_3da, ierr) do k = z_start_ghost, z_max_ghost do j = y_start_ghost, y_max_ghost do i = x_start_ghost, x_max_ghost pres(j, i+1, k) = local_vector_3da(i, j, k) end do end do end do call DMDAVecRestoreArrayF90(dm, global_vector, global_vector_3da, ierr) call DMDAVecRestoreArrayF90(dm, local_vector, local_vector_3da, ierr) ! The following call is required to prevent a memory leak. call VecDestroy(local_vector, ierr) end subroutine copy_pressure_out_of_petsc end module pressure_solver