Dear R-users, I'm having a small problem while bootstraping data. What i would like to do, is resmple the data and calulate a function on this, so i can estimate the measure of reproducability for this data. The function i wrote works fine, even while bootstraping. The only problem is that bootstraping. The dataset existes of 10 trials, each divided in to 3 groups of high(3) medium(2) and low(1). A bootstrap samlpe (trial) should always exist of 5 obs. taken from each group population, so to be representative. example: original data: trial 1 : group(1) = (0,0,1,0,0);group(2) = (0,1,1,0,1);group(3) = (1,1,1,1,1) ... bootstraped data: trial 1 : group(1) = (0,0,0,0,1);group(2) = (1,1,0,0,1);group(3) = (1,0,1,1,1) NOT bootstraped data: trial 1 : group(1) = (0,0,0,0,1,1,0);group(2) = (1,0,1);group(3) = (1,0,1,1,1,1,1,0,1,1) Now I am familiar how to use function "bootstrap" (pkg "bootstrap"), but i read about a function called "boot" (pkg "boot"), however i can't seem to master this. The explanation (help('boot') ) isn't making me any smarter. I know I can always split the data up (wich is what i am doing) but i was wondering whether this would have an effect on the bootstrap, maby it is beter to keep all the groups together? here is a (this time WORKING) code example of what i did. ## proc ## generate data datas <- data.frame("protection"=c(rep(c(0,1,0,1,0,0,1,0,1,1,1,0,1,1,1),2),c(0,0,0,0,1,0,1,0,1,1,1,0,1,0,1),rep(c(0,1,1,1,0,0,1,1,0,1,1,1,1,0,1),2),c(0,1,0,0,1,0,1,1,1,1,1,1,1,1,1),rep(c(0,1,0,1,0,0,1,1,1,1,1,0,1,1,1),2),c(0,1,0,0,0,0,1,1,0,1,1,1,1,1,1),c(0,0,1,1,0,0,1,1,1,0,0,1,1,0,1)) ,"group"=rep(1:3,50),"trial"=c(rep(1,15),rep(2,15),rep(3,15),rep(4,15),rep(5,15),rep(6,15),rep(7,15),rep(8,15),rep(9,15),rep(10,15))) ## describe Function Vacc.Vcon <-function (dataset1 , trialdata , groupdata ) {
groups <- unique (groupdata) trials <- unique (trialdata) Tr <- length(trials) G <- length(groups) Gl <- length(dataset1)/(G*Tr) Tl <- length(dataset1)/(Tr) iterg <-data.frame(1:G,as.vector(groups)) trials <- unique (trialdata) Tr <- length(trials) itert <-data.frame(1:Tr,as.vector(trials)) triallist <- c() grouplist <- c() for (x in 1:G){ ifelse(x==1,y<-x,y<- y+Tr) grouplist[c(y:(y+Tr-1))] <-rep(iterg[x,2],Tr)} iter <-data.frame(1:(Tr),rep(trials,G),grouplist) VACC <- data.frame() VACC.sub <- function (dataset1,trialn,groupn){ p0 <-sum( ifelse(dataset1==1 & trialdata==trialn & groupdata==groupn, 1,0) ) p1 <-sum( ifelse(dataset1==0 & trialdata==trialn & groupdata==groupn, 1,0) ) p <- p0+p1 VACC.group <- list('Trial'=trialn,'Group'=groupn,'Vacc'=sum((p0/p)^2 ,(p1/p)^2),"p0"=(p0/p) , "p1"=(p1/p) ,"n0"=as.numeric(p0),"n1"=as.numeric(p1),'n'=as.numeric(p)) VACC.group } for (i in 1:(G*Tr) ) { VACC[i,1] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[1] VACC[i,2] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[2] VACC[i,3] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[3] VACC[i,4] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[4] VACC[i,5] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[5] VACC[i,6] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[6] VACC[i,7] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[7] VACC[i,8] <- VACC.sub (dataset1,iter[i,2],iter[i,3])[8] VACC} rownames(VACC) <- NULL rownames(VACC) <- paste(iter[,2],iter[,3],sep='_') Pcalc <- function(x) { out<-(1/(Tr)) * sum(x) out} P0 <- tapply( VACC$p0,VACC$Group,Pcalc) P1 <- tapply( VACC$p1,VACC$Group,Pcalc) Vcon <- mean(cbind(P0^2 + P1^2)) Vacc.total <- mean (tapply( VACC$Vacc,VACC$Group,mean)) out <- list("all"=VACC,"N"=G,"P0"=P0,"P1"=P1,"Vcon"=Vcon*100,"Vacc.total"=Vacc.total*100) out } ## end describe Function Vacc.Vcon (datas[,1] , datas[,3], datas[,2]) # example of how fun works ## data needs to be in matrix form for bootstrap function xdata <-matrix( cbind(datas$protection,datas$group,datas$trial),ncol=3,byrow=F) ## function for bootstrap vacc.boot <- function(x,xdata){ Vacc.Vcon(xdata[x,1],xdata[x,3],xdata[x,2]) } bootk <- 10 results <- bootstrap(1:150,bootk,vacc.boot,xdata) taccs <- list() ;Vaccs <- vector();Vcons <- vector() boot.amp.vac2<- for(i in 1:bootk) { m.i <- results$thetastar[[i]] taccs[i] <- list(m.i ) G.Vacc <- round( tapply(taccs[[i]]$all$Vacc,rownames(taccs[[i]]$all),mean)*100 ,digits=3) Vaccs <- round( mean(taccs[[i]]$Vacc.total),digits=3) Vcons<-round( mean(taccs[[i]]$Vcon ),digits=3) tacc <- list( "data"=taccs,"Booted.means"=list("Vacc.grouped"=G.Vacc ,"Vacc.Total"=Vaccs,"Vcon.Total"=Vcons)) tacc} Rep.table <- tacc$Booted.mean Rep.table ## problem area => n should always be 5 in each group as in the original data #calcues based on original data last colon : n = 5 Vacc.Vcon (datas[,1] ,datas[,3], datas[,2])$all [1:5,] #calcues based on Booted data n is not 5 ! tacc$data[[2]]$all[1:5,] tacc$data[[10]]$all[1:5,] ## does not work at all # i thoughed that f means the strata have to be used # as frequenties, for resampling from this (strata)group boot(xdata, Vacc.Vcon, R=bootk, stype="f", strata=xdata[,3]) boot(xdata, vacc.boot, R=bootk, stype="f", strata=xdata[,3]) ## end proc Thanks in advance, Tom. Disclaimer: click here [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.