Torsten Hothorn <[EMAIL PROTECTED]> writes: > > Dear All, > > > > is there a stratified version of the Wilcoxon test (also known as van > > Elteren test) available in R? > > you can plug it together using the `coin' infrastructure (see the > examples in the manual and vignette).
I managed to dig out our old code, and patched it up loosely to fit versions of R later than 0.62 (the trend test code still seems broken). Use at own risk. The usage is fairly straightforward: > SKruskal.test(y~g1|g2) Kruskal-Wallis stratified rank sum test data: y , group: g1 , strata: g2 K = 3.1486, df = 3, p-value = 0.3693
"SKruskal.test" <- function(formula,trend=F,data=sys.frame(sys.parent()),subset) { trendtest <- (mode(trend)=="logical" && trend) | (mode(trend)=="numeric") deparen <- function(expr) { # Splus code had mode(expr) == "(", which should also work in R 0.62 if(mode(expr) == "call" && expr[[1]] == as.name("(")) expr <- expr[[2]] if(is.recursive(expr)) for(i in seq(along = expr)) expr[[i]] <- deparen(expr[[i]]) expr } # as of now, accept x~g or x~g|s # later: x~g1*g2|s1*s2 (or g1:g2 ?) for crossed factors formula <- deparen(formula) if (mode(formula) != "call") stop("formula is specified incorrectly") if (mode(formula[[3]])=="call") { strat<-T if(formula[[3]][[1]] != as.name("|")) stop("malformed conditioning formula") # The below trick appears to be necessary since terms.formula doesn't # know about '|' formula[[3]][[1]] <- as.name("+") varlist <- attr(terms(formula), "variables") # is all.names safe for all variants of model formulas? var.names <- all.vars(varlist) strata <- eval(varlist[[4]],data) } else if (mode(formula[[3]])=="name") { strat<-F varlist <- attr(terms(formula), "variables") var.names <- all.vars(expr) } # Probably better to eval(varlist,data) and extract afterwards... # Or maybe use model.matrix instead to catch embedded function calls, etc. x <- eval(varlist[[2]],data) group <- eval(varlist[[3]],data) if (length(x) != length(group)) stop("response and group vector must have the same length") if (strat && length(x) != length(strata)) stop("response and stratum vector must have the same length") if (trendtest && mode(trend)=="numeric") { if (length(trend)!= length(table(group))) stop("the number of groups must equal the length of the trend vector") else if (length(unique(trend)) < 2 ) stop("trend must have at least two different levels") } # Does the test below work in R ?? if (trendtest && mode(group)=="character" && !is.factor(group)) stop("group is of mode character and not a factor") if (!strat) strata <- rep(1,length(x)) ok <- complete.cases(x,group,strata) if ( !missing(subset) ) ok <- ok & eval(substitute(subset),data) x <- x[ok] strata <- as.factor(strata[ok]) group <- group[ok] j <- length(unique(group)) # These tests are obsolete - I think... # i <- length(table(strata)) # if (i<2) stop("all observations are in the same stratum") # else if (all(sapply(table(strata),function(x)(x<2))==F)==F) # stop("there must be at least 2 observations in each stratum") # if (j<2) stop("all observations are in the same group") if (!trendtest) group <- as.factor(group) else { # is this logic OK?? if (mode(group)=="character" && all(class(group)!=c("ordered","factor"))) warning("group is not an ordered factor") if (mode(trend)=="logical") group<-as.numeric(group) else group<-trend[as.factor(group)] } r<-numeric(length(x)) r[order(strata)]<-unlist(tapply(x,strata,function(x)rank(x)/length(x))) weight<-tapply(x,strata, function(x)(12/((1+1/length(x))* (1-sum(table(x)^3-table(x))/ (length(x)^3-length(x)) ))))[strata] weight <- as.vector(weight) if(strat) KW.lm <- lm(r ~ strata + group, weights=weight) else KW.lm <- lm(r ~ group, weights=weight) if (!trendtest) { aovtable <- anova(KW.lm) Statistic <- aovtable["group","Sum Sq"] df <- j-1 names(Statistic) <- "K" pvalue <- 1- pchisq(Statistic,df) if (strat){ method <- "Kruskal-Wallis stratified rank sum test" name <- paste(var.names[1],", group: ",var.names[2], ", strata: ",var.names[3]) } else { method <- "Kruskal-Wallis rank sum test" name <- paste(var.names[1],", group: ",var.names[2]) } } else { s <- summary(KW.lm) Statistic <- s$coefficients["group","t Value"] * s$sigma df <- 1 names(Statistic) <- "Z" pvalue <- 2*(1 - pnorm(abs(Statistic))) if (mode(trend)=="numeric") wname <- paste(trend,collapse=" ") else wname <- "as.numeric(group)" if (strat) { method <- "Kruskal-Wallis stratified rank sum trend test" name <- paste(var.names[1], ", group: ",var.names[2], ", strata: ",var.names[3],", trend: ",wname) } else { method <- "Kruskal-Wallis rank sum trend test" name <- paste(var.names[1], ", group: ",var.names[2], ", trend: ",wname) } } names(df) <- "df" RVAL <- list(statistic =Statistic, parameter = df, p.value = pvalue, method = method, data.name = name) class(RVAL) <- "htest" return(RVAL) }
-- O__ ---- Peter Dalgaard Ă˜ster Farimagsgade 5, Entr.B c/ /'_ --- Dept. of Biostatistics PO Box 2099, 1014 Cph. K (*) \(*) -- University of Copenhagen Denmark Ph: (+45) 35327918 ~~~~~~~~~~ - ([EMAIL PROTECTED]) FAX: (+45) 35327907
______________________________________________ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html