Summary:
+ I am writing an R extension that needs to call dmvnorm more than
10,000 times during a model fitting computation.
+ My extension uses openmp for parallel execution.
+ As of R 3.0, it is no longer permitted for threads to call the R
interpreter because there is a stack overflow check that always trips
because the thread's stack is different from what R is expecting.
+ Therefore, I need a C version of dmvnorm.
+ The mvtnorm package maintainer is not interested in
R_RegisterCCallable.
Can a C version of dmvnorm get added to the R core?
--
Joshua N. Pritikin
Department of Psychology
University of Virginia
485 McCormick Rd, Gilmer Hall Room 102
Charlottesville, VA 22904
http://people.virginia.edu/~jnp3bc
--- Begin Message ---
On Mon, 3 Jun 2013, Joshua N Pritikin wrote:
On Mon, Jun 03, 2013 at 03:24:54PM +0200, Torsten Hothorn wrote:
On Sat, 1 Jun 2013, Joshua N Pritikin wrote:
On Fri, May 31, 2013 at 07:22:52PM +0200, Torsten Hothorn wrote:
- what exactly is the advantage of using C? How much time / memory can be
saved?
I bet the difference will only appear in micro-benchmarks. There is
probably no real world impact.
As I said, the main reason I wrote this is because I need to call
dmvnorm from C.
I see -- so now that you have the C code ready, why do we need it to
be part of mvtnorm?
I want to share. What if somebody else has the same need?
sure, but interfacing C code from other packages is always troublesome (at
least this is my experience). I can't see any advantage for mvtnorm, so I
think one should put the code somewhere else.
Should I submit it to the R core?
I suggest that, yes. Maybe R core is keen to add the multivariate normal
density to R's C API. This would be the easiest way to make your code
accessible.
Best,
Torsten
--
Joshua N. Pritikin
Department of Psychology
University of Virginia
485 McCormick Rd, Gilmer Hall Room 102
Charlottesville, VA 22904
http://people.virginia.edu/~jnp3bc
--- End Message ---
diff --git a/R/mvnorm.R b/R/mvnorm.R
index 4bb7b9e..fcf962b 100644
--- a/R/mvnorm.R
+++ b/R/mvnorm.R
@@ -66,11 +66,7 @@ dmvnorm <- function (x, mean, sigma, log=FALSE)
if (length(mean) != NROW(sigma)) {
stop("mean and sigma have non-conforming size")
}
- distval <- mahalanobis(x, center = mean, cov = sigma)
- logdet <- sum(log(eigen(sigma, symmetric=TRUE,
- only.values=TRUE)$values))
- logretval <- -(ncol(x)*log(2*pi) + logdet + distval)/2
+ logretval <- apply(x, 1, function(loc) .Call("dmvnorm_wrapper", loc, mean, sigma))
if(log) return(logretval)
exp(logretval)
}
-
diff --git a/src/Makevars b/src/Makevars
new file mode 100644
index 0000000..5722e48
--- /dev/null
+++ b/src/Makevars
@@ -0,0 +1 @@
+PKG_LIBS = ${LAPACK_LIBS} $(BLAS_LIBS) ${FLIBS}
diff --git a/src/dmvnorm.c b/src/dmvnorm.c
new file mode 100644
index 0000000..074975f
--- /dev/null
+++ b/src/dmvnorm.c
@@ -0,0 +1,166 @@
+#include <R.h>
+#include <Rmath.h>
+#include <Rinternals.h>
+#include <R_ext/BLAS.h>
+#include <R_ext/Lapack.h>
+
+static const int ERROR_LEN = 80;
+
+static double
+_mahalanobis(char *err, int dim, double *loc, double *center, double *origCov)
+{
+ double *half = NULL;
+ double *cloc = Realloc(NULL, dim, double);
+ for (int dx=0; dx < dim; dx++) {
+ cloc[dx] = loc[dx] - center[dx];
+ }
+
+ double *cov = Realloc(NULL, dim * dim, double);
+ memcpy(cov, origCov, sizeof(double) * dim * dim);
+
+ double *icov = Realloc(NULL, dim * dim, double);
+ for (int rx=0; rx < dim; rx++) {
+ for (int cx=0; cx < dim; cx++) {
+ icov[rx * dim + cx] = rx==cx? 1 : 0;
+ }
+ }
+
+ int *ipiv = Realloc(NULL, dim, int);
+ int info;
+ F77_CALL(dgesv)(&dim, &dim, cov, &dim, ipiv, icov, &dim, &info);
+ if (info < 0) {
+ snprintf(err, ERROR_LEN, "Arg %d is invalid", -info);
+ goto errout;
+ }
+ if (info > 0) {
+ snprintf(err, ERROR_LEN, "Sigma is singular and cannot be inverted");
+ goto errout;
+ }
+
+ half = Realloc(NULL, dim, double);
+ char trans='n';
+ double alpha=1;
+ double beta=0;
+ int inc=1;
+ F77_CALL(dgemv)(&trans, &dim, &dim, &alpha, icov, &dim, cloc, &inc, &beta, half, &inc);
+
+ double got=0;
+ for (int dx=0; dx < dim; dx++) got += half[dx] * cloc[dx];
+
+ errout:
+ Free(half);
+ Free(cloc);
+ Free(cov);
+ Free(icov);
+ Free(ipiv);
+ return got;
+}
+
+static double
+mahalanobis(int dim, double *loc, double *center, double *origCov)
+{
+ char err[ERROR_LEN];
+ err[0] = 0;
+ double ret = _mahalanobis(err, dim, loc, center, origCov);
+ if (err[0]) error("%s", err);
+ return ret;
+}
+
+SEXP mahalanobis_wrapper(SEXP Rloc, SEXP Rcenter, SEXP Rcov)
+{
+ SEXP ret;
+ PROTECT(ret = allocVector(REALSXP, 1));
+ REAL(ret)[0] = mahalanobis(length(Rloc), REAL(Rloc), REAL(Rcenter), REAL(Rcov));
+ UNPROTECT(1);
+ return ret;
+}
+
+static double
+_dmvnorm(char *err, int dim, double *loc, double *mean, double *origSigma)
+{
+ double dist = mahalanobis(dim, loc, mean, origSigma);
+
+ double *sigma = Realloc(NULL, dim * dim, double);
+ memcpy(sigma, origSigma, sizeof(double) * dim * dim);
+ double *work = NULL;
+ int *iwork = NULL;
+
+ char jobz = 'N';
+ char range = 'A';
+ char uplo = 'U';
+ double vunused;
+ int iunused;
+ double abstol = 0;
+ int m;
+ double w[dim];
+ double Z[dim];
+ int ldz=1;
+ int isuppz[2*dim];
+ int lwork = -1;
+ double optlWork;
+ int optliWork;
+ int liwork = -1;
+ int info;
+
+ F77_CALL(dsyevr)(&jobz, &range, &uplo,
+ &dim, sigma, &dim,
+ &vunused, &vunused,
+ &iunused, &iunused,
+ &abstol, &m, w,
+ Z, &ldz, isuppz,
+ &optlWork, &lwork,
+ &optliWork, &liwork, &info);
+ if (info != 0) {
+ snprintf(err, ERROR_LEN, "dsyevr failed when requesting work space size");
+ goto errout;
+ }
+
+ lwork = optlWork;
+ work = Realloc(NULL, lwork, double);
+ liwork = optliWork;
+ iwork = Realloc(NULL, liwork, int);
+
+ F77_CALL(dsyevr)(&jobz, &range, &uplo, &dim, sigma, &dim,
+ &vunused, &vunused, &iunused, &iunused, &abstol, &m, w, Z, &ldz, isuppz,
+ work, &lwork, iwork, &liwork, &info);
+ if (info < 0) {
+ snprintf(err, ERROR_LEN, "Arg %d is invalid", -info);
+ goto errout;
+ }
+ if (info > 0) {
+ snprintf(err, ERROR_LEN, "dsyevr: internal error");
+ goto errout;
+ }
+ if (m < dim) {
+ snprintf(err, ERROR_LEN, "Sigma not of full rank");
+ goto errout;
+ }
+
+ for (int dx=0; dx < dim; dx++) dist += log(w[dx]);
+ double got = -(dim * M_LN_SQRT_2PI*2 + dist)/2;
+
+ errout:
+ Free(sigma);
+ Free(work);
+ Free(iwork);
+ return got;
+}
+
+static double
+dmvnorm(int dim, double *loc, double *mean, double *sigma)
+{
+ char err[ERROR_LEN];
+ err[0] = 0;
+ double ret = _dmvnorm(err, dim, loc, mean, sigma);
+ if (err[0]) error("%s", err);
+ return ret;
+}
+
+SEXP dmvnorm_wrapper(SEXP Rloc, SEXP Rmean, SEXP Rsigma)
+{
+ SEXP ret;
+ PROTECT(ret = allocVector(REALSXP, 1));
+ REAL(ret)[0] = dmvnorm(length(Rloc), REAL(Rloc), REAL(Rmean), REAL(Rsigma));
+ UNPROTECT(1);
+ return ret;
+}
diff --git a/tests/dmvnorm.R b/tests/dmvnorm.R
new file mode 100644
index 0000000..3fcc463
--- /dev/null
+++ b/tests/dmvnorm.R
@@ -0,0 +1,79 @@
+#options(error = utils::recover)
+library(mvtnorm)
+
+#set.seed(1)
+
+genOrthogonal<-function(dim) {
+ Q<-MOrthogonal(runif(dim))
+ return(Q)
+}
+
+# Construct an orthogonal matrix whose first few columns are standardized 'M'
+# where columns of 'M' are orthogonal.
+# Here "standardized 'M'" means each its columns has length 1.
+MOrthogonal<-function(M)
+{
+ # can set the parameter "tol" of "qr" to decide how small value should be 0
+ tmp<-qr(M)
+ Q<-qr.Q(tmp,complete=TRUE)
+ if(is.vector(M)) { if(Q[1]*M[1]<0) Q<- -Q }
+ else { if(Q[1,1]*M[1,1]<0) Q<- - Q }
+ return(Q)
+}
+
+# adapted from clusterGeneration 1.3.1 by Weiliang Qiu, Harry Joe
+genPositiveDefMat <- function(dim, low=-1.4, upp=1.4) {
+ u<-matrix(0, dim,dim)
+ egvalues <- exp(runif(dim,min=low,max=upp))
+ diag(u)<-egvalues #the diagonal elements of u are positive
+ Sigma<-u
+ if(dim>1)
+ { Q<-genOrthogonal(dim) # generate an orthogonal matrix
+ Sigma<-Q%*%u%*%t(Q) # the final positive definite matrix
+ }
+ Sigma
+}
+
+R.dmvnorm <- function (x, mean, sigma, log=FALSE)
+{
+ if (is.vector(x)) {
+ x <- matrix(x, ncol = length(x))
+ }
+ if (missing(mean)) {
+ mean <- rep(0, length = ncol(x))
+ }
+ if (missing(sigma)) {
+ sigma <- diag(ncol(x))
+ }
+ if (NCOL(x) != NCOL(sigma)) {
+ stop("x and sigma have non-conforming size")
+ }
+ if (!isSymmetric(sigma, tol = sqrt(.Machine$double.eps),
+ check.attributes = FALSE)) {
+ stop("sigma must be a symmetric matrix")
+ }
+ if (length(mean) != NROW(sigma)) {
+ stop("mean and sigma have non-conforming size")
+ }
+ distval <- mahalanobis(x, center = mean, cov = sigma)
+ logdet <- sum(log(eigen(sigma, symmetric=TRUE,
+ only.values=TRUE)$values))
+ logretval <- -(ncol(x)*log(2*pi) + logdet + distval)/2
+ if(log) return(logretval)
+ exp(logretval)
+}
+
+for (dim in 1:3) {
+ sigma <- genPositiveDefMat(dim)
+ center <- rnorm(dim)
+ loc <- rnorm(dim)
+
+ stopifnot(all.equal(dmvnorm(loc, center, sigma, log=TRUE),
+ R.dmvnorm(loc, center, sigma, log=TRUE)))
+}
+
+sigma <- genPositiveDefMat(4)
+center <- rnorm(4)
+loc <- matrix(rnorm(4*8), nrow=8)
+stopifnot(all.equal(R.dmvnorm(loc, center, sigma),
+ dmvnorm(loc, center, sigma)))
______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel