Torsten Hothorn <[EMAIL PROTECTED]> writes:

> > Dear All,
> >
> > is there a stratified version of the Wilcoxon test (also known as van
> > Elteren test) available in R?
> 
> you can plug it together using the `coin' infrastructure (see the
> examples in the manual and vignette).

I managed to dig out our old code, and patched it up loosely to fit
versions of R later than 0.62 (the trend test code still seems
broken). Use at own risk. The usage is fairly straightforward:

> SKruskal.test(y~g1|g2)

        Kruskal-Wallis stratified rank sum test

data:  y , group:  g1 , strata:  g2
K = 3.1486, df = 3, p-value = 0.3693


"SKruskal.test" <-
function(formula,trend=F,data=sys.frame(sys.parent()),subset) 
{
  trendtest <- (mode(trend)=="logical" && trend) | (mode(trend)=="numeric")

  deparen <- function(expr)
    {
      # Splus code had mode(expr) == "(", which should also work in R 0.62
      if(mode(expr) == "call" && expr[[1]] == as.name("(")) 
        expr <- expr[[2]]
      if(is.recursive(expr))
        for(i in seq(along = expr))
          expr[[i]] <- deparen(expr[[i]])
      expr
    }

  # as of now, accept x~g or x~g|s
  # later: x~g1*g2|s1*s2 (or g1:g2 ?) for crossed factors
  formula <- deparen(formula)
  if (mode(formula) != "call")
    stop("formula is specified incorrectly")


  if (mode(formula[[3]])=="call") {
    strat<-T
    if(formula[[3]][[1]] != as.name("|"))
      stop("malformed conditioning formula")
    # The below trick appears to be necessary since terms.formula doesn't 
    # know  about '|'
    formula[[3]][[1]] <- as.name("+")
    varlist <- attr(terms(formula), "variables")
    # is all.names safe for all variants of model formulas?
    var.names <- all.vars(varlist)
    strata <- eval(varlist[[4]],data)
  } 
  else if (mode(formula[[3]])=="name") {
    strat<-F
    varlist <- attr(terms(formula), "variables")
    var.names <- all.vars(expr)
  }

  # Probably better to eval(varlist,data) and extract afterwards...
  # Or maybe use model.matrix instead to catch embedded function calls, etc.
  x      <- eval(varlist[[2]],data)
  group  <- eval(varlist[[3]],data)

  if (length(x) != length(group))
    stop("response and group vector must have the same length")
  if (strat && length(x) != length(strata))
    stop("response and stratum vector must have the same length")
 
  if (trendtest && mode(trend)=="numeric") {  
    if (length(trend)!= length(table(group)))
      stop("the number of groups must equal the length of the trend vector")
    else if (length(unique(trend)) < 2 )
      stop("trend must have at least two different levels")
  }

# Does the test below work in R ??
  if (trendtest && mode(group)=="character" && !is.factor(group))
    stop("group is of mode character and not a factor") 

  if (!strat) strata <- rep(1,length(x))

  ok <- complete.cases(x,group,strata)
  if ( !missing(subset) )
    ok <- ok & eval(substitute(subset),data)
  x <- x[ok]
  strata <- as.factor(strata[ok])
  group <- group[ok]

  j <- length(unique(group))

# These tests are obsolete - I think...

#  i <- length(table(strata))

#    if (i<2)  stop("all observations are in the same stratum")
#    else if (all(sapply(table(strata),function(x)(x<2))==F)==F) 
#      stop("there must be at least 2 observations in each stratum")
#    if (j<2)  stop("all observations are in the same group")


  if (!trendtest)     
    group <- as.factor(group) 
  else {
# is this logic OK??
    if (mode(group)=="character" && 
        all(class(group)!=c("ordered","factor")))
      warning("group is not an ordered factor")
    if (mode(trend)=="logical")
      group<-as.numeric(group)
    else 
      group<-trend[as.factor(group)]
  }

  r<-numeric(length(x))
  r[order(strata)]<-unlist(tapply(x,strata,function(x)rank(x)/length(x)))
  weight<-tapply(x,strata,
                 function(x)(12/((1+1/length(x))*
                                 (1-sum(table(x)^3-table(x))/
                                  (length(x)^3-length(x))
                                  ))))[strata]
  weight <- as.vector(weight)

  if(strat)
    KW.lm <- lm(r ~ strata + group, weights=weight)
  else
    KW.lm <- lm(r ~ group, weights=weight)

  if (!trendtest) {
    aovtable <- anova(KW.lm)
    Statistic <- aovtable["group","Sum Sq"]
    df <- j-1 
    names(Statistic) <- "K"
    pvalue <- 1- pchisq(Statistic,df)
    if (strat){ 
      method <- "Kruskal-Wallis stratified rank sum test"
      name   <-  paste(var.names[1],", group: ",var.names[2],
                       ", strata: ",var.names[3])
    } else {
      method <- "Kruskal-Wallis rank sum test"
      name   <-  paste(var.names[1],", group: ",var.names[2])
    }
  }
  else {
    s <- summary(KW.lm)
    Statistic <- s$coefficients["group","t Value"] *
      s$sigma 
    df <- 1    
    names(Statistic) <- "Z"
    pvalue <- 2*(1 - pnorm(abs(Statistic)))
    if (mode(trend)=="numeric")  
      wname <- paste(trend,collapse=" ")
    else 
      wname <- "as.numeric(group)"
    if (strat) {
      method <- "Kruskal-Wallis stratified rank sum trend test"
      name   <-  paste(var.names[1], ", group: ",var.names[2],
                       ", strata: ",var.names[3],", trend: ",wname) 
    } else {
      method <- "Kruskal-Wallis rank sum trend test"
      name   <-  paste(var.names[1], ", group: ",var.names[2],
                       ", trend: ",wname) 
    }
  }

  names(df) <- "df"     

  RVAL <- list(statistic =Statistic, 
               parameter = df,
               p.value = pvalue,
               method = method,
               data.name = name)

  class(RVAL) <- "htest"
        
  return(RVAL)
}


-- 
   O__  ---- Peter Dalgaard             Ă˜ster Farimagsgade 5, Entr.B
  c/ /'_ --- Dept. of Biostatistics     PO Box 2099, 1014 Cph. K
 (*) \(*) -- University of Copenhagen   Denmark          Ph:  (+45) 35327918
~~~~~~~~~~ - ([EMAIL PROTECTED])                  FAX: (+45) 35327907
______________________________________________
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html

Reply via email to