Anthony,

   This crash is in PETSc code before it calls the SuperLU_DIST numeric 
factorization; likely we have a mistake such as assuming a process has at least 
one row of the matrix and need to fix it.

   Barry

> 0x00007fe6ba609297 in MatLUFactorNumeric_SuperLU_DIST (F=0x1922b50,
>    A=0x14a6a70, info=0x19099f8)
>    at 
> /home/anthony/LIB/petsc-3.6.1/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c:368
> 368           colA_start = rstart + ajj[0]; /* the smallest global col index 
> of A */


    
> On Aug 10, 2015, at 3:50 PM, Anthony Haas <a...@email.arizona.edu> wrote:
> 
> Hi Sherry,
> 
> I recently submitted a matrix for which I noticed that Superlu_dist was 
> hanging when running on 4 processors with parallel symbolic factorization. I 
> have been using the latest version of Superlu_dist and the code is not 
> hanging anymore. However, I noticed that when running the same matrix (I have 
> attached the matrix), the code crashes with the following SIGSEGV when 
> running on 10 procs (with or without parallel symbolic factorization). It is 
> probably overkill to run such a 'small' matrix on 10 procs but I thought that 
> it might still be useful to report the problem?? See below for the error 
> obtained when running with gdb and also a code snippet to reproduce the error.
> 
> Thanks,
> 
> 
> Anthony
> 
> 
> 
> 1) ERROR in GDB
> 
> Program received signal SIGSEGV, Segmentation fault.
> 0x00007fe6ba609297 in MatLUFactorNumeric_SuperLU_DIST (F=0x1922b50,
>    A=0x14a6a70, info=0x19099f8)
>    at 
> /home/anthony/LIB/petsc-3.6.1/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c:368
> 368           colA_start = rstart + ajj[0]; /* the smallest global col index 
> of A */
> (gdb)
> 
> 
> 
> 2) PORTION OF CODE TO REPRODUCE ERROR
> 
>    Subroutine HowBigLUCanBe(rank)
> 
>      IMPLICIT NONE
> 
>      integer(i4b),intent(in) :: rank
>      integer(i4b)            :: i,ct
>      real(dp)                :: begin,endd
>      complex(dpc)            :: sigma
> 
>      PetscErrorCode ierr
> 
> 
>      if (rank==0) call cpu_time(begin)
> 
>      if (rank==0) then
>         write(*,*)
>         write(*,*)'Testing How Big LU Can Be...'
>         write(*,*)'============================'
>         write(*,*)
>      endif
> 
>      !sigma = (1.0d0,0.0d0)
>      !call MatAXPY(A,-sigma,B,DIFFERENT_NONZERO_PATTERN,ierr) ! on exit A = 
> A-sigma*B
> 
>      !call MatView(A,PETSC_VIEWER_STDOUT_WORLD,ierr)
> 
> !.....Write Matrix to ASCII and Binary Format
>      !call PetscViewerASCIIOpen(PETSC_COMM_WORLD,"Amat.m",viewer,ierr)
>      !call MatView(DXX,viewer,ierr)
>      !call PetscViewerDestroy(viewer,ierr)
> 
>      !call 
> PetscViewerBinaryOpen(PETSC_COMM_WORLD,"Amat_binary.m",FILE_MODE_WRITE,viewer,ierr)
>      !call MatView(A,viewer,ierr)
>      !call PetscViewerDestroy(viewer,ierr)
> 
> !...Load a Matrix in Binary Format
>      call 
> PetscViewerBinaryOpen(PETSC_COMM_WORLD,"Amat_binary.m",FILE_MODE_READ,viewer,ierr)
>      call MatCreate(PETSC_COMM_WORLD,DLOAD,ierr)
>      call MatSetType(DLOAD,MATAIJ,ierr)
>      call MatLoad(DLOAD,viewer,ierr)
>      call PetscViewerDestroy(viewer,ierr)
> 
>      !call MatView(DLOAD,PETSC_VIEWER_STDOUT_WORLD,ierr)
> 
> 
> !.....Create Linear Solver Context
>      call KSPCreate(PETSC_COMM_WORLD,ksp,ierr)
> 
> !.....Set operators. Here the matrix that defines the linear system also 
> serves as the preconditioning matrix.
>      !call KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN,ierr) !aha 
> commented and replaced by next line
> 
>      !call KSPSetOperators(ksp,A,A,ierr) ! remember: here A = A-sigma*B
>      call KSPSetOperators(ksp,DLOAD,DLOAD,ierr) ! remember: here A = A-sigma*B
> 
> !.....Set Relative and Absolute Tolerances and Uses Default for Divergence Tol
>      tol = 1.e-10
>      call 
> KSPSetTolerances(ksp,tol,tol,PETSC_DEFAULT_REAL,PETSC_DEFAULT_INTEGER,ierr)
> 
> !.....Set the Direct (LU) Solver
>      call KSPSetType(ksp,KSPPREONLY,ierr)
>      call KSPGetPC(ksp,pc,ierr)
>      call PCSetType(pc,PCLU,ierr)
>      call PCFactorSetMatSolverPackage(pc,MATSOLVERSUPERLU_DIST,ierr) ! 
> MATSOLVERSUPERLU_DIST MATSOLVERMUMPS
> 
> !.....Create Right-Hand-Side Vector
>      !call MatCreateVecs(A,frhs,PETSC_NULL_OBJECT,ierr)
>      !call MatCreateVecs(A,sol,PETSC_NULL_OBJECT,ierr)
> 
>      call MatCreateVecs(DLOAD,frhs,PETSC_NULL_OBJECT,ierr)
>      call MatCreateVecs(DLOAD,sol,PETSC_NULL_OBJECT,ierr)
> 
>      call MatGetOwnershipRange(DLOAD,IstartA,IendA,ierr)!;CHKERRQ(ierr)
> 
>      allocate(xwork1(IendA-IstartA))
>      allocate(loc(IendA-IstartA))
> 
>      ct=0
>      do i=IstartA,IendA-1
>         ct=ct+1
>         loc(ct)=i
>         xwork1(ct)=(1.0d0,0.0d0)
>      enddo
> 
>      call VecSetValues(frhs,IendA-IstartA,loc,xwork1,INSERT_VALUES,ierr)
>      call VecZeroEntries(sol,ierr)
> 
>      deallocate(xwork1,loc)
> 
> !.....Assemble Vectors
>      call VecAssemblyBegin(frhs,ierr)
>      call VecAssemblyEnd(frhs,ierr)
> 
> !.....Solve the Linear System
>      call KSPSolve(ksp,frhs,sol,ierr)
> 
>      !call VecView(sol,PETSC_VIEWER_STDOUT_WORLD,ierr)
> 
>      if (rank==0) then
>         call cpu_time(endd)
>         write(*,*)
>         print '("Total time for HowBigLUCanBe = ",f21.3," 
> seconds.")',endd-begin
>      endif
> 
>      call SlepcFinalize(ierr)
> 
>      STOP
> 
> 
>    end Subroutine HowBigLUCanBe
> 
> <Amat_binary.m.info>

Reply via email to