Summary:

+ I am writing an R extension that needs to call dmvnorm more than 
10,000 times during a model fitting computation.

+ My extension uses openmp for parallel execution.

+ As of R 3.0, it is no longer permitted for threads to call the R 
interpreter because there is a stack overflow check that always trips 
because the thread's stack is different from what R is expecting.

+ Therefore, I need a C version of dmvnorm.

+ The mvtnorm package maintainer is not interested in 
R_RegisterCCallable.

Can a C version of dmvnorm get added to the R core?

-- 
Joshua N. Pritikin
Department of Psychology
University of Virginia
485 McCormick Rd, Gilmer Hall Room 102
Charlottesville, VA 22904
http://people.virginia.edu/~jnp3bc
--- Begin Message ---

On Mon, 3 Jun 2013, Joshua N Pritikin wrote:

On Mon, Jun 03, 2013 at 03:24:54PM +0200, Torsten Hothorn wrote:
On Sat, 1 Jun 2013, Joshua N Pritikin wrote:
On Fri, May 31, 2013 at 07:22:52PM +0200, Torsten Hothorn wrote:
- what exactly is the advantage of using C? How much time / memory can be
 saved?

I bet the difference will only appear in micro-benchmarks. There is
probably no real world impact.

As I said, the main reason I wrote this is because I need to call
dmvnorm from C.

I see -- so now that you have the C code ready, why do we need it to
be part of mvtnorm?

I want to share. What if somebody else has the same need?

sure, but interfacing C code from other packages is always troublesome (at least this is my experience). I can't see any advantage for mvtnorm, so I think one should put the code somewhere else.


Should I submit it to the R core?


I suggest that, yes. Maybe R core is keen to add the multivariate normal density to R's C API. This would be the easiest way to make your code accessible.

Best,

Torsten

--
Joshua N. Pritikin
Department of Psychology
University of Virginia
485 McCormick Rd, Gilmer Hall Room 102
Charlottesville, VA 22904
http://people.virginia.edu/~jnp3bc


--- End Message ---
diff --git a/R/mvnorm.R b/R/mvnorm.R
index 4bb7b9e..fcf962b 100644
--- a/R/mvnorm.R
+++ b/R/mvnorm.R
@@ -66,11 +66,7 @@ dmvnorm <- function (x, mean, sigma, log=FALSE)
     if (length(mean) != NROW(sigma)) {
         stop("mean and sigma have non-conforming size")
     }
-    distval <- mahalanobis(x, center = mean, cov = sigma)
-    logdet <- sum(log(eigen(sigma, symmetric=TRUE,
-                                   only.values=TRUE)$values))
-    logretval <- -(ncol(x)*log(2*pi) + logdet + distval)/2
+    logretval <- apply(x, 1, function(loc) .Call("dmvnorm_wrapper", loc, mean, sigma))
     if(log) return(logretval)
     exp(logretval)
 }
-  
diff --git a/src/Makevars b/src/Makevars
new file mode 100644
index 0000000..5722e48
--- /dev/null
+++ b/src/Makevars
@@ -0,0 +1 @@
+PKG_LIBS = ${LAPACK_LIBS} $(BLAS_LIBS) ${FLIBS}
diff --git a/src/dmvnorm.c b/src/dmvnorm.c
new file mode 100644
index 0000000..074975f
--- /dev/null
+++ b/src/dmvnorm.c
@@ -0,0 +1,166 @@
+#include <R.h>
+#include <Rmath.h>
+#include <Rinternals.h>
+#include <R_ext/BLAS.h>
+#include <R_ext/Lapack.h>
+
+static const int ERROR_LEN = 80;
+
+static double
+_mahalanobis(char *err, int dim, double *loc, double *center, double *origCov)
+{
+  double *half = NULL;
+  double *cloc = Realloc(NULL, dim, double);
+  for (int dx=0; dx < dim; dx++) {
+    cloc[dx] = loc[dx] - center[dx];
+  }
+
+  double *cov = Realloc(NULL, dim * dim, double);
+  memcpy(cov, origCov, sizeof(double) * dim * dim);
+
+  double *icov = Realloc(NULL, dim * dim, double);
+  for (int rx=0; rx < dim; rx++) {
+    for (int cx=0; cx < dim; cx++) {
+      icov[rx * dim + cx] = rx==cx? 1 : 0;
+    }
+  }
+  
+  int *ipiv = Realloc(NULL, dim, int);
+  int info;
+  F77_CALL(dgesv)(&dim, &dim, cov, &dim, ipiv, icov, &dim, &info);
+  if (info < 0) {
+    snprintf(err, ERROR_LEN, "Arg %d is invalid", -info);
+    goto errout;
+  }
+  if (info > 0) {
+    snprintf(err, ERROR_LEN, "Sigma is singular and cannot be inverted");
+    goto errout;
+  }
+
+  half = Realloc(NULL, dim, double);
+  char trans='n';
+  double alpha=1;
+  double beta=0;
+  int inc=1;
+  F77_CALL(dgemv)(&trans, &dim, &dim, &alpha, icov, &dim, cloc, &inc, &beta, half, &inc);
+
+  double got=0;
+  for (int dx=0; dx < dim; dx++) got += half[dx] * cloc[dx];
+
+ errout:
+  Free(half);
+  Free(cloc);
+  Free(cov);
+  Free(icov);
+  Free(ipiv);
+  return got;
+}
+
+static double
+mahalanobis(int dim, double *loc, double *center, double *origCov)
+{
+  char err[ERROR_LEN];
+  err[0] = 0;
+  double ret = _mahalanobis(err, dim, loc, center, origCov);
+  if (err[0]) error("%s", err);
+  return ret;
+}
+
+SEXP mahalanobis_wrapper(SEXP Rloc, SEXP Rcenter, SEXP Rcov)
+{
+  SEXP ret;
+  PROTECT(ret = allocVector(REALSXP, 1));
+  REAL(ret)[0] = mahalanobis(length(Rloc), REAL(Rloc), REAL(Rcenter), REAL(Rcov));
+  UNPROTECT(1);
+  return ret;
+}
+
+static double
+_dmvnorm(char *err, int dim, double *loc, double *mean, double *origSigma)
+{
+  double dist = mahalanobis(dim, loc, mean, origSigma);
+
+  double *sigma = Realloc(NULL, dim * dim, double);
+  memcpy(sigma, origSigma, sizeof(double) * dim * dim);
+  double *work = NULL;
+  int *iwork = NULL;
+
+  char jobz = 'N';
+  char range = 'A';
+  char uplo = 'U';
+  double vunused;
+  int iunused;
+  double abstol = 0;
+  int m;
+  double w[dim];
+  double Z[dim];
+  int ldz=1;
+  int isuppz[2*dim];
+  int lwork = -1;
+  double optlWork;
+  int optliWork;
+  int liwork = -1;
+  int info;
+
+  F77_CALL(dsyevr)(&jobz, &range, &uplo,
+		   &dim, sigma, &dim,
+		   &vunused, &vunused,
+		   &iunused, &iunused,
+		   &abstol, &m, w,
+		   Z, &ldz, isuppz,
+		   &optlWork, &lwork,
+		   &optliWork, &liwork, &info);
+  if (info != 0) {
+    snprintf(err, ERROR_LEN, "dsyevr failed when requesting work space size");
+    goto errout;
+  }
+
+  lwork = optlWork;
+  work = Realloc(NULL, lwork, double);
+  liwork = optliWork;
+  iwork = Realloc(NULL, liwork, int);
+
+  F77_CALL(dsyevr)(&jobz, &range, &uplo, &dim, sigma, &dim,
+		   &vunused, &vunused, &iunused, &iunused, &abstol, &m, w, Z, &ldz, isuppz,
+		   work, &lwork, iwork, &liwork, &info);
+  if (info < 0) {
+    snprintf(err, ERROR_LEN, "Arg %d is invalid", -info);
+    goto errout;
+  }
+  if (info > 0) {
+    snprintf(err, ERROR_LEN, "dsyevr: internal error");
+    goto errout;
+  }
+  if (m < dim) {
+    snprintf(err, ERROR_LEN, "Sigma not of full rank");
+    goto errout;
+  }
+
+  for (int dx=0; dx < dim; dx++) dist += log(w[dx]);
+  double got = -(dim * M_LN_SQRT_2PI*2 + dist)/2;
+
+ errout:
+  Free(sigma);
+  Free(work);
+  Free(iwork);
+  return got;
+}
+
+static double
+dmvnorm(int dim, double *loc, double *mean, double *sigma)
+{
+  char err[ERROR_LEN];
+  err[0] = 0;
+  double ret = _dmvnorm(err, dim, loc, mean, sigma);
+  if (err[0]) error("%s", err);
+  return ret;
+}
+
+SEXP dmvnorm_wrapper(SEXP Rloc, SEXP Rmean, SEXP Rsigma)
+{
+  SEXP ret;
+  PROTECT(ret = allocVector(REALSXP, 1));
+  REAL(ret)[0] = dmvnorm(length(Rloc), REAL(Rloc), REAL(Rmean), REAL(Rsigma));
+  UNPROTECT(1);
+  return ret;
+}
diff --git a/tests/dmvnorm.R b/tests/dmvnorm.R
new file mode 100644
index 0000000..3fcc463
--- /dev/null
+++ b/tests/dmvnorm.R
@@ -0,0 +1,79 @@
+#options(error = utils::recover)
+library(mvtnorm)
+
+#set.seed(1)
+
+genOrthogonal<-function(dim) { 
+  Q<-MOrthogonal(runif(dim))
+  return(Q)
+}
+
+# Construct an orthogonal matrix whose first few columns are standardized 'M'
+# where columns of 'M' are orthogonal.
+# Here "standardized 'M'" means each its columns has length 1.
+MOrthogonal<-function(M)
+{
+  # can set the parameter "tol" of "qr" to decide how small value should be 0
+  tmp<-qr(M)
+  Q<-qr.Q(tmp,complete=TRUE)
+  if(is.vector(M)) { if(Q[1]*M[1]<0) Q<- -Q }
+  else { if(Q[1,1]*M[1,1]<0) Q<- - Q }
+  return(Q)
+}
+
+# adapted from clusterGeneration 1.3.1 by Weiliang Qiu, Harry Joe
+genPositiveDefMat <- function(dim, low=-1.4, upp=1.4) {
+  u<-matrix(0, dim,dim)
+  egvalues <- exp(runif(dim,min=low,max=upp))
+  diag(u)<-egvalues #the diagonal elements of u are positive
+  Sigma<-u
+  if(dim>1)
+  { Q<-genOrthogonal(dim) # generate an orthogonal matrix 
+    Sigma<-Q%*%u%*%t(Q) # the final positive definite matrix
+  }
+  Sigma
+}
+
+R.dmvnorm <- function (x, mean, sigma, log=FALSE)
+{
+    if (is.vector(x)) {
+        x <- matrix(x, ncol = length(x))
+    }
+    if (missing(mean)) {
+        mean <- rep(0, length = ncol(x))
+    }
+    if (missing(sigma)) {
+        sigma <- diag(ncol(x))
+    }
+    if (NCOL(x) != NCOL(sigma)) {
+        stop("x and sigma have non-conforming size")
+    }
+    if (!isSymmetric(sigma, tol = sqrt(.Machine$double.eps), 
+                     check.attributes = FALSE)) {
+        stop("sigma must be a symmetric matrix")
+    }
+    if (length(mean) != NROW(sigma)) {
+        stop("mean and sigma have non-conforming size")
+    }
+    distval <- mahalanobis(x, center = mean, cov = sigma)
+    logdet <- sum(log(eigen(sigma, symmetric=TRUE,
+                                   only.values=TRUE)$values))
+    logretval <- -(ncol(x)*log(2*pi) + logdet + distval)/2
+    if(log) return(logretval)
+    exp(logretval)
+}
+  
+for (dim in 1:3) {
+  sigma <- genPositiveDefMat(dim)
+  center <- rnorm(dim)
+  loc <- rnorm(dim)
+
+  stopifnot(all.equal(dmvnorm(loc, center, sigma, log=TRUE),
+                      R.dmvnorm(loc, center, sigma, log=TRUE)))
+}
+
+sigma <- genPositiveDefMat(4)
+center <- rnorm(4)
+loc <- matrix(rnorm(4*8), nrow=8)
+stopifnot(all.equal(R.dmvnorm(loc, center, sigma),
+                    dmvnorm(loc, center, sigma)))
______________________________________________
R-devel@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-devel

Reply via email to