Dave:

We are planning an intervention study for adolescent alcohol use, and I am planning to use simulations based on a hurdle model (using the hurdle() function in package pscl) for sample size estimation.

The simulation code and power code are below -- note that at the moment the "power" code is just returning the coefficients, as something isn't working quite right.

Hmmm, strange. I didn't see the problem either. Some parameters did not seem to be passed correctly but I did not see it.

But as I saw a few things that could be done more elegantly or in a way that they are easier to read (for me), I simply rewrote the code almost from scratch. For example, I now use plogis() for the inverse logit, rbinom() for drawing binomial random numbers, colMeans() for column means, and coeftest()/waldtest() for computing the p-values from the Wald tests.

The code is included below. Both the simulation of the coefficients and the simulation of associated power values seem to yield plausible results.

Hope that helps,
Z

dgp <- function(nobs = 1000, beta0 = 2.5, beta1 = -0.2,
  alpha0 = -0.9, alpha1 = 0.2, theta = 2.2)
{
  trt <- rep(0:1, length.out = nobs)
  pr <- plogis(alpha0 + alpha1 * trt)
  y1 <- rbinom(nobs, size = 1, prob = pr)
  mu <- exp(beta0 + beta1 * trt)
  y2 <- rnegbin(nobs, mu = mu, theta = theta)
  while(any(y2_0 <- y2 < 1L)) y2[y2_0] <-
    rnegbin(sum(y2_0), mu = mu[y2_0], theta = theta)
  data.frame(trt = trt, y = y1 * y2)
}

coefsim <- function(nrep = 100, ...) {
  nam <- c("beta0", "beta1", "alpha0", "alpha1", "theta")
  rval <- matrix(NA, nrow = nrep, ncol = length(nam))
  for(i in 1:nrep) {
    dat <- dgp(...)
    m <- hurdle(y ~ trt, data = dat, dist = "negbin")
    rval[i,] <- c(coef(m), m$theta)
  }
  colnames(rval) <- nam
  colMeans(rval)
}

powersim <- function(nrep = 100, level = 0.05, ...) {
  nam <- c("beta1", "alpha1", "both")
  rval <- matrix(NA, nrow = nrep, ncol = length(nam))
  for(i in 1:nrep) {
    dat <- dgp(...)
    m <- hurdle(y ~ trt, data = dat, dist = "negbin")
    m0 <- hurdle(y ~ 1, data = dat, dist = "negbin")
    rval[i,] <- c(coeftest(m)[c(2, 4), 4], waldtest(m0, m)[2, 4])
  }
  colnames(rval) <- nam
  colMeans(rval < level)
}


library("pscl")
library("lmtest")

set.seed(1)
out1 <- coefsim()
round(out1, digits = 3)

set.seed(1)
out2 <- powersim()
round(out2, digits = 3)


The average estimates from code below are:

count_(Intercept)         count_trt  zero_(Intercept)
     2.498327128      -0.000321315       0.910293501
        zero_trt
    -0.200134813

Three of the four look right (ie, converging to population values), but the count_trt is stuck at zero, regardless of sample size (when it should be ~ -0.20).

Does anyone see what's wrong?

Thanks for any input.

cheers, Dave



mysim <- function(n, beta0, beta1, alpha0, alpha1, theta){
        trt <- c(rep(0,n), rep(1,n))
        ### mean function logit model
        p0 <- exp(alpha0 + alpha1*trt)/(1 + exp(alpha0 + alpha1*trt))
        ### 0 / 1 based on p0
        y1 <- as.numeric(runif(n)>p0)
        ### mean function count portion
        mu <- exp(beta0 + beta1*trt)
        ### estimate counts using NB dist
        require(MASS, quietly = TRUE)
        y2 <- rnegbin(n, mu = mu, theta = theta)
        ### if y2 = 0, draw new value
        while(sum(y2==0)>0){
y2[which(y2==0)] <- rnegbin(length(which(y2==0)), mu=mu[which(y2==0)], theta = theta)
        }
        y<-y1*y2
        data.frame(trt=trt,y=y)
}
#alpha0, alpha1 is the parameter for zero part
#beta0,beta1 is the parameter for negative binomial
#theta is dispersion parameter for negative binomial, infinity correspond to poisson
#

#example power analysis
#return three power, power1 for zero part, power2 for negative binomial part
#power3 for joint test,significance level can be set, default is 0.05
#M is simulation time
#require pscl package
#library(pscl)

mypower <- function(n, beta0, beta1, alpha0, alpha1, theta, siglevel=0.05, M=1000){
        myfun <- function(n,beta0,beta1,alpha0,alpha1,theta,siglevel){
                data <- mysim(n,beta0,beta1,alpha0,alpha1,theta)
                require(pscl, quietly = TRUE)
res <- hurdle(y ~ trt, data = data, dist = "negbin", trace = FALSE)
                est <- coef(res)#[c(2,4)]
                #v<-res$vcov[c(2,4),c(2,4)]
                #power1<-as.numeric(2*pnorm(-abs(est)[2]/sqrt(v[2,2]))<siglevel)
                #power2<-as.numeric(2*pnorm(-abs(est)[1]/sqrt(v[1,1]))<siglevel)
                
#power3<-as.numeric((1-pchisq(t(est)%*%solve(v)%*%est,df=2))<siglevel)
                #c(power1,power2,power3)
        }
r <- replicate(M, myfun(n,beta0,beta1,alpha0,alpha1,theta,siglevel), simplify=TRUE)
        apply(r, 1, mean)
}

out <- mypower(n = 1000, beta0 = 2.5, beta1 = -0.20,
alpha0 = -0.90, alpha1 = 0.20,
                                                 theta = 2.2, M = 100)
out


--
Dave Atkins, PhD
Research Associate Professor
Department of Psychiatry and Behavioral Science
University of Washington
datk...@u.washington.edu

Center for the Study of Health and Risk Behaviors (CSHRB) 1100 NE 45th Street, Suite 300 Seattle, WA 98105 206-616-3879 http://depts.washington.edu/cshrb/ (Mon-Wed)
Center for Healthcare Improvement, for Addictions, Mental Illness,
 Medically Vulnerable Populations (CHAMMP)
325 9th Avenue, 2HH-15
Box 359911
Seattle, WA 98104
http://www.chammp.org
(Thurs)

______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to