I am trying to solve a generalized symmetric eigensystem problem using
SLEPc, and I am getting much different eigenvalues compared with the
dense LAPACK solver DSYGVD. I have attached a minimal working example
(MWE). The code relies on the external libraries GSL, Lapack and Lapacke.
The matrices (A,B) come from discretizing the Laplacian in spherical
coordinates using a radial basis function method. Here are the
eigenvalues computed by the dense LAPACK solver:
=== LAPACK eigenvalues ===
eval(0) = 1.309567289750e+00
eval(1) = 1.648674331144e+00
eval(2) = 1.709879393690e+00
eval(3) = 1.709879393690e+00
eval(4) = 2.142210251638e+00
eval(5) = 2.142210251638e+00
eval(6) = 2.145726457896e+00
eval(7) = 2.216526311324e+00
eval(8) = 2.216526311324e+00
eval(9) = 2.695837030236e+00
Here are the eigenvalues computed by SLEPc, using the command:
$ ./mwe -eps_nev 10 -eps_smallest_magnitude -eps_tol 1e-8
Number of requested eigenvalues: 10
Linear eigensolve converged (10 eigenpairs) due to CONVERGED_TOL;
iterations 196
---------------------- --------------------
k ||Ax-kBx||/||kx||
---------------------- --------------------
0.094520 906.031
0.177268 363.291
1.324501 61.9577
1.786472 35.7082
2.187857 32.8364
-2.886905 27.7314
2.899206 24.2224
4.195222 20.3007
4.787192 12.8346
7.221589 9.58513
---------------------- --------------------
As we can see, the results are quite different than LAPACK and the error
norms in the right column are quite large. Using a stricter tolerance
(eps_tol) does not seem to help. Can anyone suggest how to diagnose and
fix the problem?
/*
* laplace3d.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <slepceps.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_spmatrix.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_eigen.h>
#include <gsl/gsl_permutation.h>
#include <gsl/gsl_permute_vector.h>
#include <lapacke/lapacke.h>
#include "le_laplace3d.h"
static double compute_distance(const gsl_vector * a, const gsl_vector * b);
static int slepc_eigen_gsymm(const gsl_spmatrix * A, const gsl_spmatrix * B, gsl_vector * eval);
static double rbf_wendlandC4_phi(const double r, const double c);
static double rbf_wendlandC4_laplacian(const double r, const double c);
static int lapack_dsygvd(gsl_matrix * A, gsl_matrix * B, gsl_vector * eval);
#define RBF_C (2.0)
/*
laplace3d_alloc()
Allocate a laplace3d workspace
Inputs: dim - dimensions of grid, length 3
*/
laplace3d_workspace *
laplace3d_alloc(const size_t dim[])
{
laplace3d_workspace * w;
size_t i;
w = calloc(1, sizeof(laplace3d_workspace));
if (!w)
{
GSL_ERROR_NULL ("failed to allocate workspace", GSL_ENOMEM);
}
for (i = 0; i < 3; ++i)
w->dim[i] = dim[i];
w->N = dim[0] * dim[1] * dim[2];
w->As = gsl_spmatrix_alloc(w->N, w->N);
w->Ls = gsl_spmatrix_alloc(w->N, w->N);
w->A = gsl_matrix_alloc(w->N, w->N);
w->L = gsl_matrix_alloc(w->N, w->N);
w->Alpha = gsl_matrix_alloc(w->N, w->N);
w->eval = gsl_vector_alloc(w->N);
w->work = gsl_vector_alloc(w->N);
return w;
}
void
laplace3d_free(laplace3d_workspace * w)
{
if (w->As)
gsl_spmatrix_free(w->As);
if (w->Ls)
gsl_spmatrix_free(w->Ls);
if (w->A)
gsl_matrix_free(w->A);
if (w->L)
gsl_matrix_free(w->L);
if (w->Alpha)
gsl_matrix_free(w->Alpha);
if (w->eval)
gsl_vector_free(w->eval);
if (w->work)
gsl_vector_free(w->work);
free(w);
}
/*
laplace3d_proc()
Compute eigenfunctions of Laplacian on a given domain
Inputs: X - grid points specifying domain, N-by-3
X(i,:) = Cartesian (X,Y,Z) vector of grid point
w - workspace
Notes:
1) On output, w->Alpha contain the eigenmode coefficients,
u_j(x) = \sum_i \alpha_{ij} \phi(||x - x_i||)
*/
int
laplace3d_proc(const gsl_matrix * X, laplace3d_workspace * w)
{
const size_t N = w->N;
if (X->size1 != N || X->size2 != 3)
{
GSL_ERROR ("X matrix has wrong size", GSL_EBADLEN);
}
else
{
int status;
const double c = RBF_C;
size_t i, j;
double min_rij = 1.0e8, max_rij = -1.0e8;
size_t nnz = 0, nnz_d2 = 0;
fprintf(stderr, "laplace3d_proc: constructing A and L matrices...");
for (i = 0; i < N; ++i)
{
gsl_vector_const_view Xi = gsl_matrix_const_row(X, i);
for (j = 0; j <= i; ++j)
{
gsl_vector_const_view Xj = gsl_matrix_const_row(X, j);
double rij = (i == j) ? 0.0 : compute_distance(&Xi.vector, &Xj.vector);
double phi = rbf_wendlandC4_phi(rij, c);
double d2phi = rbf_wendlandC4_laplacian(rij, c);
gsl_matrix_set(w->A, i, j, phi);
gsl_matrix_set(w->L, i, j, -d2phi);
if (phi != 0.0)
gsl_spmatrix_set(w->As, i, j, phi);
if (d2phi != 0.0)
gsl_spmatrix_set(w->Ls, i, j, -d2phi);
if (phi != 0.0)
++nnz;
if (d2phi != 0.0)
++nnz_d2;
if (i != j)
{
if (rij > max_rij)
max_rij = rij;
if (rij < min_rij)
min_rij = rij;
}
}
}
fprintf(stderr, "done\n");
fprintf(stderr, "laplace3d_proc: min r_ij = %f\n", min_rij);
fprintf(stderr, "laplace3d_proc: max r_ij = %f\n", max_rij);
fprintf(stderr, "laplace3d_proc: c = %f\n", c);
fprintf(stderr, "laplace3d_proc: nnz(A) = %zu/%zu [%.1f%%]\n", nnz, N*N, (double)nnz / (double)(N*N) * 100.0);
fprintf(stderr, "laplace3d_proc: nnz(L) = %zu/%zu [%.1f%%]\n", nnz_d2, N*N, (double)nnz_d2 / (double)(N*N) * 100.0);
fprintf(stderr, "laplace3d_proc: nnz(As) = %zu/%zu [%.1f%%]\n", w->As->nz, N*N, (double)w->As->nz / (double)(N*N) * 100.0);
fprintf(stderr, "laplace3d_proc: nnz(Ls) = %zu/%zu [%.1f%%]\n", w->Ls->nz, N*N, (double)w->Ls->nz / (double)(N*N) * 100.0);
/*XXXprintsym_octave(w->A, "A");
printsym_octave(w->L, "L");*/
#if 1 /* use sparse generalized eigensystem method */
fprintf(stderr, "laplace3d_proc: performing sparse generalized eigenvalue decomposition...");
status = slepc_eigen_gsymm(w->Ls, w->As, w->eval);
fprintf(stderr, "done (status = %d)\n", status);
#elif 1 /* use dense generalized eigensystem method */
fprintf(stderr, "laplace3d_proc: performing generalized eigenvalue decomposition...");
status = lapack_dsygvd(w->L, w->A, w->eval);
fprintf(stderr, "done (status = %d)\n", status);
if (status != 0)
{
fprintf(stderr, "laplace3d_proc: DSYGVD failed\n");
return GSL_FAILURE;
}
/*XXXprint_octave(w->L, "V");*/
/* save eigenvectors in w->Alpha */
gsl_matrix_memcpy(w->Alpha, w->L);
{
size_t i;
fprintf(stderr, "=== LAPACK eigenvalues ===\n");
for (i = 0; i < 10; ++i)
{
double ei = gsl_vector_get(w->eval, i);
fprintf(stderr, "eval(%zu) = %.12e\n", i, ei);
}
}
#else /* compute eigendecomposition of L A^{-1} */
{
gsl_permutation * perm = gsl_permutation_alloc(N);
int signum, eval_found;
fprintf(stderr, "laplace3d_proc: computing LU decomposition of A...");
gettimeofday(&tv0, NULL);
gsl_matrix_transpose_tricpy(CblasLower, CblasUnit, w->A, w->A);
status = gsl_linalg_LU_decomp(w->A, perm, &signum);
gettimeofday(&tv1, NULL);
fprintf(stderr, "done (status = %d, %g seconds)\n", status, time_diff(tv0, tv1));
fprintf(stderr, "laplace3d_proc: computing A^{-1} L...");
gettimeofday(&tv0, NULL);
gsl_matrix_transpose_tricpy(CblasLower, CblasUnit, w->L, w->L);
/* apply permutation to columns of L := P L */
for (i = 0; i < N; ++i)
{
gsl_vector_view v = gsl_matrix_column(w->L, i);
gsl_permute_vector(perm, &v.vector);
}
gsl_blas_dtrsm(CblasLeft, CblasLower, CblasNoTrans, CblasUnit, 1.0, w->A, w->L); /* L := L^{-1} P L */
gsl_blas_dtrsm(CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, 1.0, w->A, w->L); /* L := U^{-1} L^{-1} P L */
gettimeofday(&tv1, NULL);
fprintf(stderr, "done (%g seconds)\n", time_diff(tv0, tv1));
fprintf(stderr, "laplace3d_proc: computing eigendecomposition of A^{-1} L...");
gettimeofday(&tv0, NULL);
status = lapack_eigen_symmv(w->L, w->eval, w->Alpha, &eval_found);
gettimeofday(&tv1, NULL);
fprintf(stderr, "done (status = %d, %g seconds)\n", status, time_diff(tv0, tv1));
gsl_permutation_free(perm);
}
#endif
/* sort eigenvalues and eigenvectors smallest to largest */
gsl_eigen_symmv_sort(w->eval, w->Alpha, GSL_EIGEN_SORT_VAL_ASC);
return 0;
}
}
/*
laplace3d_eval()
Evaluate an eigenmode at a given location,
u(v) = \sum_i \alpha_i \phi(||v - x_i||)
Inputs: mode - mode number, in [0,N-1]
v - Cartesian vector of location (X,Y,Z), size 3
X - N-by-3 matrix of RBF centers, in Cartesian components
result - (output) u(v)
w - workspace
Notes:
1) laplace3d_proc() must be called first to compute the matrix w->Alpha
*/
int
laplace3d_eval(const size_t mode, const gsl_vector * v,
const gsl_matrix * X, double * result, laplace3d_workspace * w)
{
const size_t N = w->N;
if (mode >= N)
{
GSL_ERROR ("invalid mode number", GSL_EDOM);
}
else if (v->size != 3)
{
GSL_ERROR ("v vector has wrong size", GSL_EBADLEN);
}
else if (X->size1 != N || X->size2 != 3)
{
GSL_ERROR ("X matrix has wrong size", GSL_EBADLEN);
}
else
{
gsl_vector_const_view alpha = gsl_matrix_const_column(w->Alpha, mode);
size_t i;
/* work_i = phi(||v - x_i||) */
for (i = 0; i < N; ++i)
{
gsl_vector_const_view Xi = gsl_matrix_const_row(X, i);
double r = compute_distance(v, &Xi.vector);
double phi = rbf_wendlandC4_phi(r, RBF_C);
gsl_vector_set(w->work, i, phi);
}
gsl_blas_ddot(w->work, &alpha.vector, result);
return GSL_SUCCESS;
}
}
/*****************************************
* INTERNAL ROUTINES *
*****************************************/
static double
compute_distance(const gsl_vector * a, const gsl_vector * b)
{
const size_t n = a->size;
size_t i;
double result = 0.0;
for (i = 0; i < n; ++i)
{
double ai = gsl_vector_get(a, i);
double bi = gsl_vector_get(b, i);
result = gsl_hypot(result, ai - bi);
}
return result;
}
/*
slepc_eigen_gsymm()
Solve sparse symmetric generalized eigensystem problem,
A x = lambda B x
Inputs: A - N-by-N symmetric matrix
B - N-by-N symmetric posdef matrix
eval - (output) eigenvalues
*/
static int
slepc_eigen_gsymm(const gsl_spmatrix * A, const gsl_spmatrix * B, gsl_vector * eval)
{
const size_t N = A->size1;
if (A->size2 != N)
{
GSL_ERROR ("A must be square", GSL_ENOTSQR);
}
else if (B->size1 != B->size2)
{
GSL_ERROR ("B must be square", GSL_ENOTSQR);
}
else if (B->size1 != N)
{
GSL_ERROR ("B must match dimensions of A", GSL_EBADLEN);
}
else if (eval->size != N)
{
GSL_ERROR ("eval has wrong dimensions", GSL_EBADLEN);
}
else
{
gsl_spmatrix * A_csr = gsl_spmatrix_compress(A, GSL_SPMATRIX_CSR);
gsl_spmatrix * B_csr = gsl_spmatrix_compress(B, GSL_SPMATRIX_CSR);
PetscInt NN = (PetscInt) N;
PetscInt * nnz = malloc(N * sizeof(PetscInt));
Mat AA, BB;
EPS eps; /* eigenproblem solver context */
size_t i;
/* compute nnz array for A */
for (i = 0; i < N; ++i)
nnz[i] = A_csr->p[i + 1] - A_csr->p[i];
MatCreate(PETSC_COMM_WORLD, &AA);
MatSetSizes(AA, PETSC_DECIDE, PETSC_DECIDE, NN, NN);
MatSetFromOptions(AA);
MatSeqAIJSetPreallocation(AA, 0, nnz);
MatSetUp(AA);
/* compute nnz array for B */
for (i = 0; i < N; ++i)
nnz[i] = B_csr->p[i + 1] - B_csr->p[i];
MatCreate(PETSC_COMM_WORLD, &BB);
MatSetSizes(BB, PETSC_DECIDE, PETSC_DECIDE, NN, NN);
MatSetFromOptions(BB);
MatSeqAIJSetPreallocation(BB, 0, nnz);
MatSetUp(BB);
for (i = 0; i < A->nz; ++i)
MatSetValue(AA, A->i[i], A->p[i], A->data[i], INSERT_VALUES);
for (i = 0; i < B->nz; ++i)
MatSetValue(BB, B->i[i], B->p[i], B->data[i], INSERT_VALUES);
MatAssemblyBegin(AA, MAT_FINAL_ASSEMBLY);
MatAssemblyEnd(AA, MAT_FINAL_ASSEMBLY);
MatAssemblyBegin(BB, MAT_FINAL_ASSEMBLY);
MatAssemblyEnd(BB, MAT_FINAL_ASSEMBLY);
/* create eigensolver and set options */
EPSCreate(PETSC_COMM_WORLD, &eps);
/* define generalized symmetric eigenproblem */
EPSSetOperators(eps, AA, BB);
EPSSetProblemType(eps, EPS_GHEP);
/* set solver parameters at runtime */
EPSSetFromOptions(eps);
/* solve eigensystem */
EPSSolve(eps);
/* output some info */
{
EPSType type;
PetscInt nev;
EPSGetType(eps, &type);
PetscPrintf(PETSC_COMM_WORLD," Solution method: %s\n\n",type);
EPSGetDimensions(eps, &nev, NULL, NULL);
PetscPrintf(PETSC_COMM_WORLD," Number of requested eigenvalues: %" PetscInt_FMT "\n",nev);
PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
EPSConvergedReasonView(eps,PETSC_VIEWER_STDOUT_WORLD);
EPSErrorView(eps,EPS_ERROR_RELATIVE,PETSC_VIEWER_STDOUT_WORLD);
PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);
}
free(nnz);
gsl_spmatrix_free(A_csr);
gsl_spmatrix_free(B_csr);
EPSDestroy(&eps);
MatDestroy(&AA);
MatDestroy(&BB);
return GSL_SUCCESS;
}
}
static double
rbf_wendlandC4_phi(const double r, const double c)
{
if (c <= r)
{
return (0.0);
}
else
{
const double ratio = r / c;
const double t1 = gsl_pow_int(1.0 - ratio, 6);
const double t2 = 3.0 + 18.0 * ratio + 35.0 * ratio * ratio;
return (t1 * t2);
}
}
static double
rbf_wendlandC4_laplacian(const double r, const double c)
{
if (c <= r)
{
return (0.0);
}
else
{
const double t1 = -168.0 / gsl_pow_int(c, 8);
const double t2 = gsl_pow_int(c - r, 4);
const double t3 = c*c + 4.0*c*r - 15.0*r*r;
return (t1 * t2 * t3);
}
}
/*
lapack_dsygvd()
Solve generalized symmetric eigenvalue problem:
A x = lambda B x
where A,B are symmetric, and B is posdef
Inputs: A - on input, N-by-N matrix in lower triangle
on output, full matrix of eigenvectors
B - on input, N-by-N matrix in lower triangle
on output, upper triangle contains Cholesky factor, lower triangle unchanged
eval - (output) eigenvalues
*/
static int
lapack_dsygvd(gsl_matrix * A, gsl_matrix * B, gsl_vector * eval)
{
if (A->size1 != A->size2)
{
GSL_ERROR ("A matrix must be square", GSL_ENOTSQR);
}
else if (B->size1 != B->size2)
{
GSL_ERROR ("B matrix must be square", GSL_ENOTSQR);
}
else if (A->size1 != B->size1)
{
GSL_ERROR ("A and B matrices must have same dimensions", GSL_EBADLEN);
}
else if (A->size1 != eval->size)
{
GSL_ERROR ("eval vector has wrong size", GSL_EBADLEN);
}
else
{
lapack_int N = (lapack_int) A->size1;
lapack_int itype = 1;
lapack_int info;
gsl_vector * diag = gsl_vector_alloc(B->size1);
gsl_vector_view d = gsl_matrix_diagonal(B);
/* save diagonal of B */
gsl_vector_memcpy(diag, &d.vector);
/* copy lower triangles to upper for column major ordering */
gsl_matrix_transpose_tricpy(CblasLower, CblasUnit, A, A);
gsl_matrix_transpose_tricpy(CblasLower, CblasUnit, B, B);
info = LAPACKE_dsygvd(LAPACK_COL_MAJOR,
itype,
'V',
'L',
N,
A->data,
N,
B->data,
N,
eval->data);
gsl_matrix_transpose(A);
/* restore diagonal of B */
gsl_vector_memcpy(&d.vector, diag);
gsl_vector_free(diag);
return info;
}
}
/*
* le_laplace3d.h
*/
#ifndef INCLUDED_le_laplace3d_h
#define INCLUDED_le_laplace3d_h
#include <gsl/gsl_math.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_spmatrix.h>
typedef struct
{
size_t dim[3]; /* grid dimensions */
size_t N; /* total grid size */
gsl_spmatrix * As; /* phi(||xi - xj||) */
gsl_spmatrix * Ls; /* L phi(||xi - xj||) */
gsl_matrix * A; /* phi(||xi - xj||) */
gsl_matrix * L; /* L phi(||xi - xj||) */
gsl_matrix * Alpha; /* eigenmode coefficients, N-by-N */
gsl_vector * eval; /* eigenvalues, size N */
gsl_vector * work; /* workspace, size N */
} laplace3d_workspace;
/*
* Prototypes
*/
laplace3d_workspace * laplace3d_alloc(const size_t dim[]);
void laplace3d_free(laplace3d_workspace * w);
int laplace3d_proc(const gsl_matrix * X, laplace3d_workspace * w);
int laplace3d_eval(const size_t mode, const gsl_vector * v,
const gsl_matrix * X, double * result, laplace3d_workspace * w);
#endif /* INCLUDED_le_laplace3d_h */
default: mwe
include ${SLEPC_DIR}/lib/slepc/conf/slepc_common
lib_dir = /home/palken/usr/lib
common_libs = -L${lib_dir} ${lib_dir}/libgsl.a -lm ${lib_dir}/liblapacke.a
${lib_dir}/liblapack.a
OBJECTS = mwe.o laplace3d.o
mwe: ${OBJECTS}
${CLINKER} -o mwe ${OBJECTS} ${SLEPC_EPS_LIB} ${common_libs}
/*
* main.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <string.h>
#include <errno.h>
#include <getopt.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <slepceps.h>
#include "le_laplace3d.h"
static char help[] = "Laplacian Eigenfunctions";
#define CIDX3(i,Ni,j,Nj,k,Nk) ((k) + (Nk)*((j) + (Nj)*(i)))
static double
time_diff(struct timeval a, struct timeval b)
{
double sec1, sec2;
sec1 = (double) a.tv_sec + a.tv_usec * 1.0e-6;
sec2 = (double) b.tv_sec + b.tv_usec * 1.0e-6;
return sec2 - sec1;
}
/*
spherical_grid()
Generate a regular spherical grid
Inputs: dim - dimensions (nr,ntheta,nphi)
a - minimum radius
b - maximum radius
X - (output) N-by-3 matrix with Cartesian vector
locations of grid nodes
*/
int
spherical_grid(const size_t dim[], const double a, const double b, gsl_matrix * X)
{
const size_t N = dim[0] * dim[1] * dim[2];
if (X->size1 != N || X->size2 != 3)
{
GSL_ERROR ("X matrix has wrong dimensions", GSL_EBADLEN);
}
else
{
const double dr = (b - a) / (dim[0] - 1.0);
const double dtheta = M_PI / (double) dim[1];
const double dphi = 2.0 * M_PI / (double) dim[2];
const double theta_min = 0.5 * dtheta;
size_t i, j, k;
/*
* set:
* theta_min = dtheta / 2
* theta_max = pi - dtheta / 2
* to exclude the poles
*
* phi_min = 0
* phi_max = 2pi - dphi
*/
for (i = 0; i < dim[0]; ++i)
{
double r = a + i * dr;
for (j = 0; j < dim[1]; ++j)
{
double theta = theta_min + j * dtheta;
double st = sin(theta);
double ct = cos(theta);
for (k = 0; k < dim[2]; ++k)
{
size_t idx = CIDX3(i, dim[0], j, dim[1], k, dim[2]);
double phi = k * dphi;
double sp = sin(phi);
double cp = cos(phi);
gsl_matrix_set(X, idx, 0, r * st * cp);
gsl_matrix_set(X, idx, 1, r * st * sp);
gsl_matrix_set(X, idx, 2, r * ct);
}
}
}
return GSL_SUCCESS;
}
}
int
main(int argc, char * argv[])
{
char mode_dir[20]; /* output directory for modes */
int nmodes = 10; /* number of modes to print */
double rmin = 1.0;
double rmax = 5.0;
size_t dim[3];
size_t N;
gsl_matrix * X;
struct timeval tv0, tv1;
laplace3d_workspace * w;
SlepcInitialize(&argc, &argv, (char *) 0, help);
strcpy(mode_dir, "modes");
PetscOptionsGetInt(NULL, NULL, "-nmodes", &nmodes, NULL);
PetscOptionsGetString(NULL, NULL, "-mode_dir", mode_dir, sizeof(mode_dir), NULL);
dim[0] = 5; /* nr */
dim[1] = 18; /* ntheta */
dim[2] = 36; /* nphi */
N = dim[0] * dim[1] * dim[2];
X = gsl_matrix_alloc(N, 3);
fprintf(stderr, "main: generating spherical grid (size %zu)...", N);
gettimeofday(&tv0, NULL);
spherical_grid(dim, rmin, rmax, X);
gettimeofday(&tv1, NULL);
fprintf(stderr, "done (%g seconds)\n", time_diff(tv0, tv1));
w = laplace3d_alloc(dim);
laplace3d_proc(X, w);
laplace3d_free(w);
gsl_matrix_free(X);
SlepcFinalize();
return 0;
}