Anthony, This crash is in PETSc code before it calls the SuperLU_DIST numeric factorization; likely we have a mistake such as assuming a process has at least one row of the matrix and need to fix it.
Barry > 0x00007fe6ba609297 in MatLUFactorNumeric_SuperLU_DIST (F=0x1922b50, > A=0x14a6a70, info=0x19099f8) > at > /home/anthony/LIB/petsc-3.6.1/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c:368 > 368 colA_start = rstart + ajj[0]; /* the smallest global col index > of A */ > On Aug 10, 2015, at 3:50 PM, Anthony Haas <a...@email.arizona.edu> wrote: > > Hi Sherry, > > I recently submitted a matrix for which I noticed that Superlu_dist was > hanging when running on 4 processors with parallel symbolic factorization. I > have been using the latest version of Superlu_dist and the code is not > hanging anymore. However, I noticed that when running the same matrix (I have > attached the matrix), the code crashes with the following SIGSEGV when > running on 10 procs (with or without parallel symbolic factorization). It is > probably overkill to run such a 'small' matrix on 10 procs but I thought that > it might still be useful to report the problem?? See below for the error > obtained when running with gdb and also a code snippet to reproduce the error. > > Thanks, > > > Anthony > > > > 1) ERROR in GDB > > Program received signal SIGSEGV, Segmentation fault. > 0x00007fe6ba609297 in MatLUFactorNumeric_SuperLU_DIST (F=0x1922b50, > A=0x14a6a70, info=0x19099f8) > at > /home/anthony/LIB/petsc-3.6.1/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c:368 > 368 colA_start = rstart + ajj[0]; /* the smallest global col index > of A */ > (gdb) > > > > 2) PORTION OF CODE TO REPRODUCE ERROR > > Subroutine HowBigLUCanBe(rank) > > IMPLICIT NONE > > integer(i4b),intent(in) :: rank > integer(i4b) :: i,ct > real(dp) :: begin,endd > complex(dpc) :: sigma > > PetscErrorCode ierr > > > if (rank==0) call cpu_time(begin) > > if (rank==0) then > write(*,*) > write(*,*)'Testing How Big LU Can Be...' > write(*,*)'============================' > write(*,*) > endif > > !sigma = (1.0d0,0.0d0) > !call MatAXPY(A,-sigma,B,DIFFERENT_NONZERO_PATTERN,ierr) ! on exit A = > A-sigma*B > > !call MatView(A,PETSC_VIEWER_STDOUT_WORLD,ierr) > > !.....Write Matrix to ASCII and Binary Format > !call PetscViewerASCIIOpen(PETSC_COMM_WORLD,"Amat.m",viewer,ierr) > !call MatView(DXX,viewer,ierr) > !call PetscViewerDestroy(viewer,ierr) > > !call > PetscViewerBinaryOpen(PETSC_COMM_WORLD,"Amat_binary.m",FILE_MODE_WRITE,viewer,ierr) > !call MatView(A,viewer,ierr) > !call PetscViewerDestroy(viewer,ierr) > > !...Load a Matrix in Binary Format > call > PetscViewerBinaryOpen(PETSC_COMM_WORLD,"Amat_binary.m",FILE_MODE_READ,viewer,ierr) > call MatCreate(PETSC_COMM_WORLD,DLOAD,ierr) > call MatSetType(DLOAD,MATAIJ,ierr) > call MatLoad(DLOAD,viewer,ierr) > call PetscViewerDestroy(viewer,ierr) > > !call MatView(DLOAD,PETSC_VIEWER_STDOUT_WORLD,ierr) > > > !.....Create Linear Solver Context > call KSPCreate(PETSC_COMM_WORLD,ksp,ierr) > > !.....Set operators. Here the matrix that defines the linear system also > serves as the preconditioning matrix. > !call KSPSetOperators(ksp,A,A,DIFFERENT_NONZERO_PATTERN,ierr) !aha > commented and replaced by next line > > !call KSPSetOperators(ksp,A,A,ierr) ! remember: here A = A-sigma*B > call KSPSetOperators(ksp,DLOAD,DLOAD,ierr) ! remember: here A = A-sigma*B > > !.....Set Relative and Absolute Tolerances and Uses Default for Divergence Tol > tol = 1.e-10 > call > KSPSetTolerances(ksp,tol,tol,PETSC_DEFAULT_REAL,PETSC_DEFAULT_INTEGER,ierr) > > !.....Set the Direct (LU) Solver > call KSPSetType(ksp,KSPPREONLY,ierr) > call KSPGetPC(ksp,pc,ierr) > call PCSetType(pc,PCLU,ierr) > call PCFactorSetMatSolverPackage(pc,MATSOLVERSUPERLU_DIST,ierr) ! > MATSOLVERSUPERLU_DIST MATSOLVERMUMPS > > !.....Create Right-Hand-Side Vector > !call MatCreateVecs(A,frhs,PETSC_NULL_OBJECT,ierr) > !call MatCreateVecs(A,sol,PETSC_NULL_OBJECT,ierr) > > call MatCreateVecs(DLOAD,frhs,PETSC_NULL_OBJECT,ierr) > call MatCreateVecs(DLOAD,sol,PETSC_NULL_OBJECT,ierr) > > call MatGetOwnershipRange(DLOAD,IstartA,IendA,ierr)!;CHKERRQ(ierr) > > allocate(xwork1(IendA-IstartA)) > allocate(loc(IendA-IstartA)) > > ct=0 > do i=IstartA,IendA-1 > ct=ct+1 > loc(ct)=i > xwork1(ct)=(1.0d0,0.0d0) > enddo > > call VecSetValues(frhs,IendA-IstartA,loc,xwork1,INSERT_VALUES,ierr) > call VecZeroEntries(sol,ierr) > > deallocate(xwork1,loc) > > !.....Assemble Vectors > call VecAssemblyBegin(frhs,ierr) > call VecAssemblyEnd(frhs,ierr) > > !.....Solve the Linear System > call KSPSolve(ksp,frhs,sol,ierr) > > !call VecView(sol,PETSC_VIEWER_STDOUT_WORLD,ierr) > > if (rank==0) then > call cpu_time(endd) > write(*,*) > print '("Total time for HowBigLUCanBe = ",f21.3," > seconds.")',endd-begin > endif > > call SlepcFinalize(ierr) > > STOP > > > end Subroutine HowBigLUCanBe > > <Amat_binary.m.info>