Re: [R] Cochran-Armitage-trend-test

Eric Lecoutre Thu, 28 Jul 2005 08:19:09 -0700

Hi there,

I often do receive some mails about this piece of code regarding
Cochran-Armitage or Mantel Chi square.
The archived mail does unfortunately lack some pieces of code (function
"scores").
I copy there all my raw code that I did implement to mimic SAS PROC FREQ
statistics regarding the analysis of contingency tables. Whoever is
interested to take it and rework it a little bit (for example redefining
outputs so that they suits a htest object) is welcome.


Best wishes,

Eric


-----



# R functions to provides statistics on contingency tables
# Mimics SAS PROC FREQ outputs
# Implementation is the one described in SAS PROC FREQ manual

# Eric Lecoutre <[EMAIL PROTECTED]

# Feel free to use / modify / document / add to a package



#------------------------------------ UTILITARY FUNCTIONS
------------------------------------#


print.ordtest=function(l,...)
{
 tmp=matrix(c(l$estimate,l$ASE),nrow=1)
 dimnames(tmp)=list(l$name,c("Estimate","ASE"))
 print(round(tmp,4),...)
}


compADPQ=function(x)
{
        nr=nrow(x)
        nc=ncol(x)
        Aij=matrix(0,nrow=nr,ncol=nc)
        Dij=matrix(0,nrow=nr,ncol=nc)
        for (i in 1:nr) {
                for (j in 1:nc) {
        
Aij[i,j]=sum(x[1:i,1:j])+sum(x[i:nr,j:nc])-sum(x[i,])-sum(x[,j])
        
Dij[i,j]=sum(x[i:nr,1:j])+sum(x[1:i,j:nc])-sum(x[i,])-sum(x[,j])
        }}
        P=sum(x*Aij)
        Q=sum(x*Dij)
        return(list(Aij=Aij,Dij=Dij,P=P,Q=Q))
}


scores=function(x,MARGIN=1,method="table",...)
{
        # MARGIN
        #       1 - row
        #       2 - columns

        # Methods for ranks are
        #
        # x - default
        # rank
        # ridit
        # modridit
        
        if (method=="table")
        {
                if (is.null(dimnames(x))) return(1:(dim(x)[MARGIN]))
                else {
                        options(warn=-1)
                        if
(sum(is.na(as.numeric(dimnames(x)[[MARGIN]])))>0)
                        {
                                out=(1:(dim(x)[MARGIN]))
                        }
                        else
                        {
                        out=(as.numeric(dimnames(x)[[MARGIN]]))
                        }
                        options(warn=0)
                }
        }
        else    {
        ### method is a rank one
        Ndim=dim(x)[MARGIN]
        OTHERMARGIN=3-MARGIN
        
ranks=c(0,(cumsum(apply(x,MARGIN,sum))))[1:Ndim]+(apply(x,MARGIN,sum)+1)
/2
        if (method=="ranks") out=ranks
        if (method=="ridit") out=ranks/(sum(x))
        if (method=="modridit") out=ranks/(sum(x)+1)
        }
        
        return(out)
}


#------------------------------------ FUNCTIONS
------------------------------------#

tablegamma=function(x)
{
# Statistic
        tmp=compADPQ(x)
        P=tmp$P
        Q=tmp$Q
        gamma=(P-Q)/(P+Q)
# ASE
        Aij=tmp$Aij
        Dij=tmp$Dij
        tmp1=4/(P+Q)^2
        tmp2=sqrt(sum((Q*Aij - P*Dij)^2 * x))
        gamma.ASE=tmp1*tmp2
# Test  
        var0=(4/(P+Q)^2) * (sum(x*(Aij-Dij)^2) - ((P-Q)^2)/sum(x))
        tb=gamma/sqrt(var0)
        p.value=2*(1-pnorm(tb))
# Output
        
out=list(estimate=gamma,ASE=gamma.ASE,statistic=tb,p.value=p.value,name=
"Gamma",bornes=c(-1,1))
        class(out)="ordtest"
        return(out)
}


tabletauc=function(x)
{
        tmp=compADPQ(x)
        P=tmp$P
        Q=tmp$Q
        m=min(dim(x))
        n=sum(x)
# statistic
        
        tauc=(m*(P-Q))/(n^2*(m-1))
# ASE   
        Aij=tmp$Aij
        Dij=tmp$Dij
        dij=Aij-Dij
        tmp1=2*m/((m-1)*n^2)
        tmp2= sum(x * dij^2) - (P-Q)^2/n
        ASE=tmp1*sqrt(tmp2)
        
# Test  
        tb=tauc/ASE
        p.value=2*(1-pnorm(tb))
# Output
        
out=list(estimate=tauc,ASE=ASE,statistic=tb,p.value=p.value,name="Kendal
l's tau-c",bornes=c(-1,1))
        class(out)="ordtest"
        return(out)
}

tabletaub=function(x)
{
# Statistic
        tmp=compADPQ(x)
        P=tmp$P
        Q=tmp$Q
        n=sum(x)
        wr=n^2 - sum(apply(x,1,sum)^2)
        wc=n^2 - sum(apply(x,2,sum)^2)
        taub=(P-Q)/sqrt(wr*wc)  
# ASE
        Aij=tmp$Aij
        Dij=tmp$Dij
        w=sqrt(wr*wc)
        dij=Aij-Dij
        nidot=apply(x,1,sum)
        ndotj=apply(x,2,sum)
        n=sum(x)
        vij=outer(nidot,ndotj, FUN=function(a,b) return(a*wc+b*wr))
        tmp1=1/(w^2)
        tmp2= sum(x*(2*w*dij + taub*vij)^2)
        tmp3=n^3*taub^2*(wr+wc)^2
        tmp4=sqrt(tmp2-tmp3)
        taub.ASE=tmp1*tmp4
# Test
        var0=4/(wr*wc) * (sum(x*(Aij-Dij)^2) - (P-Q)^2/n)
        tb=taub/sqrt(var0)
        p.value=2*(1-pnorm(tb))
# Output        
        
out=list(estimate=taub,ASE=taub.ASE,statistic=tb,p.value=p.value,name="K
endall's tau-b",bornes=c(-1,1))
        class(out="ordtest")
        return(out)
}

tablesomersD=function(x,dep=2)
{
        # dep: which dimension stands for the dependant variable
        # 1 - ROWS
        # 2 - COLS
# Statistic
        if (dep==1) x=t(x)
        tmp=compADPQ(x)
        P=tmp$P
        Q=tmp$Q
        n=sum(x)
        wr=n^2 - sum(apply(x,1,sum)^2)
        somers=(P-Q)/wr
# ASE
        Aij=tmp$Aij
        Dij=tmp$Dij
        dij=Aij-Dij
        tmp1=2/wr^2
        tmp2=sum(x*(wr*dij - (P-Q)*(n-apply(x,1,sum)))^2)
        ASE=tmp1*sqrt(tmp2)
# Test
        var0=4/(wr^2) * (sum(x*(Aij-Dij)^2) - (P-Q)^2/n)
        tb=somers/sqrt(var0)
        p.value=2*(1-pnorm(tb))
# Output
        if (dep==1) dir="R|C" else dir= "C|R"
        name=paste("Somer's D",dir)
        
out=list(estimate=somers,ASE=ASE,statistic=tb,p.value=p.value,name=name,
bornes=c(-1,1))
        class(out)="ordtest"
        return(out)

        
}

#out=table.somersD(data)

 

tablepearson=function(x,scores.type="table")
{

# Statistic
        sR=scores(x,1,scores.type)
        sC=scores(x,2,scores.type)
        n=sum(x)
        Rbar=sum(apply(x,1,sum)*sR)/n
        Cbar=sum(apply(x,2,sum)*sC)/n
        ssr=sum(x*(sR-Rbar)^2)
        ssc=sum(t(x)* (sC-Cbar)^2)
        tmpij=outer(sR,sC,FUN=function(a,b) return((a-Rbar)*(b-Cbar)))
        ssrc= sum(x*tmpij)
        v=ssrc
        w=sqrt(ssr*ssc)
        r=v/w
# ASE
        bij=outer(sR,sC, FUN=function(a,b)return((a-Rbar)^2*ssc +
(b-Cbar)^2*ssr))        
        tmp1=1/w^2
        tmp2=x*(w*tmpij - (bij*v)/(2*w))^2
        tmp3=sum(tmp2)
        ASE=tmp1*sqrt(tmp3)
# Test
        var0= (sum(x*tmpij) - (ssrc^2/n))/ (ssr*ssc)
        tb=r/sqrt(var0)
        p.value=2*(1-pnorm(tb))
# Output
        
out=list(estimate=r,ASE=ASE,statistic=tb,p.value=p.value,name="Pearson
Correlation",bornes=c(-1,1))
        class(out)="ordtest"
        return(out)
}

# table.pearson(data)


tablespearman=function(x)
{
        # Details algorithme manuel SAS PROC FREQ page 540
# Statistic
        n=sum(x)
        nr=nrow(x)
        nc=ncol(x)
        tmpd=cbind(expand.grid(1:nr,1:nc))
        ind=rep(1:(nr*nc),as.vector(x))
        tmp=tmpd[ind,]
        rhos=cor(apply(tmp,2,rank))[1,2]
# ASE
        Ri=scores(x,1,"ranks")- n/2
        Ci=scores(x,2,"ranks")- n/2
        sr=apply(x,1,sum)
        sc=apply(x,2,sum)
        F=n^3 - sum(sr^3)
        G=n^3 - sum(sc^3)
        w=(1/12)*sqrt(F*G)
        vij=data
        for (i in 1:nrow(x))
        {
                qi=0
                if (i<nrow(x))
                {
                for (k in i:nrow(x)) qi=qi+sum(x[k,]*Ci)
                }
        }
        for (j in 1:ncol(x))
                {
                        qj=0
                        if (j<ncol(x))
                        {
                        for (k in j:ncol(x)) qj=qj+sum(x[,k]*Ri)
                        }
                vij[i,j]=n*(Ri[i]*Ci[j] +
0.5*sum(x[i,]*Ci)+0.5*sum(data[,j]*Ri) +qi+qj)
                }


        v=sum(data*outer(Ri,Ci))
        wij=-n/(96*w)*outer(sr,sc,FUN=function(a,b) return(a^2*G+b^2*F))
        zij=w*vij-v*wij
        zbar=sum(data*zij)/n
        vard=(1/(n^2*w^4))*sum(x*(zij-zbar)^2)
        ASE=sqrt(vard)          
# Test
        vbar=sum(x*vij)/n
        p1=sum(x*(vij-vbar)^2)
        p2=n^2*w^2
        var0=p1/p2
        stat=rhos/sqrt(var0)

# Output
        out=list(estimate=rhos,ASE=ASE,name="Spearman
correlation",bornes=c(-1,1))
        class(out)="ordtest"
        return(out)
}

#tablespearman(data)



 
tablelambdasym=function(x)
{
# Statistic 
        ri = apply(x,1,max)
        r=max(apply(x,2,sum))
        n=sum(x)
        cj=apply(x,2,max)
        c=max(apply(x,1,sum))
        sri=sum(ri)
        w=2*n - r -c
        v=2*n - sri - sum(cj)
        lambda=(w-v)/w
# ASE ...

        tmpSi=0
        l=min(which(apply(x,2,sum)==r))
        for (i in 1:length(ri))
        {
                li=min(which(x[i,]==ri[i]))
                if (li==l) tmpSi=tmpSi+x[i,li]
        }
        
        tmpSj=0
        k=min(which(apply(x,1,sum)==c))
        for (j in 1:length(cj))
        {
                kj=min(which(x[,j]==cj[j]))
                if (kj==k) tmpSj=tmpSj+x[kj,j]
        }

        rk=max(x[k,])
        cl=max(x[,l])
        tmpx=tmpSi+tmpSj+rk+cl
        y=8*n-w-v-2*tmpx
        
        nkl=x[k,l]
        tmpSij=0
        for (i in 1:nrow(x))
        {
                for (j in 1:ncol(x))
                {
                li=min(which(x[i,]==ri[i]))
                kj=min(which(x[,j]==cj[j]))
                tmpSij=tmpSij+x[kj,li]
                }
        }
        
        
        ASE=(1/(w^2))*sqrt(w*v*y-(2*(w^2)*(n-tmpSij))-2*(v^2)*(n-nkl))
# Output        
        
        out=list(estimate=lambda,ASE=ASE,name="Lambda
Symetric",bornes=c(0,1))
        class(out)="ordtest"
        return(out)

}
#tablelambdasym(data)

tablelambdaasym=function(x,transpose=FALSE)
{
# Statistic
        if (transpose==TRUE) x=t(x)
        ri = apply(x,1,max)
        r=max(apply(x,2,sum))
        sri=sum(ri)
        n=sum(x)
        lambda=(sum(ri)-r)/(n-r)
# ASE   
        l=min(which(apply(x,2,sum)==r))
        tmp=0
        for (i in 1:length(ri))
        {
                li=min(which(x[i,]==ri[i]))
                if (li==l) tmp=tmp+x[i,li]
        }
        ASE=sqrt(((n-sri)/(n-r)^3) *(sri+r-2*tmp))
# Output
        if (transpose) dir="R|C" else dir= "C|R"
        name=paste("Lambda asymetric",dir)
        out=list(estimate=lambda,ASE=ASE,name=name,bornes=c(0,1))
        class(out)="ordtest"
        return(out)
}




tableUCA=function(x,transpose=TRUE)
{
        if (transpose==TRUE) x=t(x)
        # Statistic
                n=sum(x)
                ni=apply(x,1,sum)
                nj=apply(x,2,sum)
                Hx=-sum((ni/n)*log(ni/n))
                Hy=-sum((nj/n)*log(nj/n))
                Hxy=-sum((x/n)*log(x/n))
                v=Hx+Hy- Hxy
                w=Hy
                U=v/w
        # ASE
                tmp1=1/((n)*(w^2))
                tmpij=0
                for (i in 1:nrow(x))
                {
                        for (j in 1:ncol(x))
                        {
                        tmpij=tmpij+(  x[i,j]*  (
Hy*log(x[i,j]/ni[i])+(Hx-Hxy)*    log(nj[j]/n)               )^2     )
                        }
                }
                ASE=tmp1*sqrt(tmpij)
        # Output
                if (transpose) dir="R|C" else dir= "C|R"
                name=paste("Uncertainty Coefficient",dir)
                out=list(estimate=U,ASE=ASE,name=name,bornes=c(0,1))
                class(out)="ordtest"
                return(out)
}

#tableUCA(data)

tableUCS=function(x)
{

        # Statistic
                n=sum(x)
                ni=apply(x,1,sum)
                nj=apply(x,2,sum)
                Hx=-sum((ni/n)*log(ni/n))
                Hy=-sum((nj/n)*log(nj/n))
                Hxy=-sum((x/n)*log(x/n))
                U=(2*(Hx+Hy-Hxy))/(Hx+Hy)
        # ASE
                tmpij=0
                for (i in 1:nrow(x))
                {
                        for (j in 1:ncol(x))
                        {
                        tmpij=tmpij+(  x[i,j]*
(Hxy*log(ni[i]*nj[j]/n^2) - (Hx+Hy)*log(x[i,j]/n))^2   /(n^2*(Hx+Hy)^4)
)
                        }
                }
                ASE=2*sqrt(tmpij)
        # Output
                name="Uncertainty Coefficient Symetric"
                out=list(estimate=U,ASE=ASE,name=name,bornes=c(0,1))
                class(out)="ordtest"
                return(out)
}





tablelinear=function(x,scores.type="table")
{
        r=tablepearson(x,scores.type)$estimate
        n=sum(x)
        ll=r^2*(n-1)
        out=list(estimate=ll)
        return(out)
}

tablephi=function(x)
{
        if (all.equal(dim(x),c(2,2))==TRUE)
        {
        rtot=apply(x,1,sum)
        ctot=apply(x,2,sum)
        phi= det(x)/sqrt(prod(rtot)*prod(ctot))
        }
        else {
        Qp=chisq.test(x)$statistic
        phi=sqrt(Qp/sum(x))
        }
        names(phi)="phi"
        return(phi=phi)
}


tableCramerV=function(x)
{
        if (all.equal(dim(x),c(2,2))==TRUE)
        {
        cramerV=tablephi(x)
        }
        else
        {
        Qp=tableChisq(x)$estimate
        cramerV=sqrt((Qp/n)/min(dim(x)-1))
        }
        names(cramerV)="Cramer's V"
        return(cramerV)
}


tableChisq=function(x)
{
        nidot=apply(x,1,sum)
        ndotj=apply(x,2,sum)
        n=sum(nidot)
        eij=outer(nidot,ndotj,"*")/n
        R=length(nidot)
        C=length(ndotj)
        dll=(R-1)*(C-1)
        Qp=sum((x-eij)^2/eij)
        p.value=1-pchisq(Qp,dll)
        
out=list(estimate=Qp,dll=dll,p.value=p.value,dim=c(R,C),name="Pearson's
Chi-square")
        return(out)
}


tableChisqLR=function(x)
{
# Likelihood ratio Chi-squared test
        nidot=apply(x,1,sum)
        ndotj=apply(x,2,sum)
        n=sum(nidot)
        eij=outer(nidot,ndotj,"*")/n
        R=length(nidot)
        C=length(ndotj)
        dll=(R-1)*(C-1)
        G2=2*sum(x*log(x/eij))
        p.value=1-pchisq(G2,dll)
        
out=list(estimate=G2,dll=dll,p.value=p.value,dim=c(R,C),name="Likelihood
ratio Chi-square")
        return(out)
}

tableChisqCA=function(x)
{
        if (all.equal(dim(x),c(2,2))==TRUE)
        {
                nidot=apply(x,1,sum)
                ndotj=apply(x,2,sum)
                n=sum(nidot)
                eij=outer(nidot,ndotj,"*")/n
                R=length(nidot)
                C=length(ndotj)
                dll=(R-1)*(C-1)
                tmp=as.vector(abs(x-eij))
                tmp=pmax(tmp-0.5,0)
                tmp=matrix(tmp,byrow=TRUE,ncol=C)
                Qc=sum(tmp^2/eij)
                p.value=1-pchisq(Qc,dll)
        
out=list(estimate=Qc,dll=dll,p.value=p.value,dim=c(R,C),name="Continuity
adjusted Chi-square")
                return(out)
        }
        else
        { stop("Continuity-adjusted chi-square must be used with
(2,2)-tables",call.=FALSE) }
}

tableChisqMH=function(x)
{
        n=sum(x)
        G2=(n-1)*(tablepearson(x)$estimate^2)
        dll=1
        p.value=1-pchisq(G2,dll)
        
out=list(estimate=G2,dll=dll,p.value=p.value,dim=dim(x),name="Mantel-Hae
nszel Chi-square")
        return(out)

}

tableCC=function(x)
{
        Qp=tableChisq(x)$estimate
        n=sum(x)
        P=sqrt(Qp/(Qp+n))
        m=min(dim(x))
        
out=list(estimate=P,dim=dim(x),bornes=c(0,sqrt((m-1)/m)),name="Contingen
cy coefficient")
        return(out)
        
}

tabletrend=function(x,transpose=FALSE)
{
        if (any(dim(x)==2))
        {
        if (transpose==TRUE) {
        x=t(x)
        }
        
        if (dim(x)[2]!=2){stop("Cochran-Armitage test for trend must be
used with a (R,2) table. Use transpose argument",call.=FALSE) }
        
        nidot=apply(x,1,sum)
        n=sum(nidot)

        Ri=scores(x,1,"table")
        Rbar=sum(nidot*Ri)/n
        
        s2=sum(nidot*(Ri-Rbar)^2)
        pdot1=sum(x[,1])/n
        T=sum(x[,1]*(Ri-Rbar))/sqrt(pdot1*(1-pdot1)*s2)
        p.value.uni=1-pnorm(abs(T))
        p.value.bi=2*p.value.uni
        
out=list(estimate=T,dim=dim(x),p.value.uni=p.value.uni,p.value.bi=p.valu
e.bi,name="Cochran-Armitage test for trend")
        return(out)
        
        }
        else {stop("Cochran-Armitage test for trend must be used with a
(2,C) or a (R,2) table",call.=FALSE) }
}











Eric Lecoutre
UCL /  Institut de Statistique
Voie du Roman Pays, 20
1348 Louvain-la-Neuve
Belgium

tel: (+32)(0)10473050
[EMAIL PROTECTED]
http://www.stat.ucl.ac.be/ISpersonnel/lecoutre

If the statistics are boring, then you've got the wrong numbers. -Edward
Tufte   


> -----Original Message-----
> From: [EMAIL PROTECTED] 
> [mailto:[EMAIL PROTECTED] On Behalf Of Vito Ricci
> Sent: jeudi 28 juillet 2005 16:30
> To: r-help@stat.math.ethz.ch
> Cc: [EMAIL PROTECTED]
> Subject: Re: [R] Cochran-Armitage-trend-test
> 
> 
> Hi,
> see:
> http://finzi.psych.upenn.edu/R/Rhelp02a/archive/20396.html
> 
> Regards,
> Vito
> 
> 
> 
> [EMAIL PROTECTED] wrote:
> 
> Hi!
> 
> I am searching for the Cochran-Armitage-trend-test. Is
> it included in an
> R-package? 
> 
> Thank you!
> 
> 
> 
> Diventare costruttori di soluzioni
> Became solutions' constructors
> 
> "The business of the statistician is to catalyze 
> the scientific learning process."  
> George E. P. Box
> 
> "Statistical thinking will one day be as necessary for 
> efficient citizenship as the ability to read and write" H. G. Wells
> 
> Top 10 reasons to become a Statistician
> 
>      1. Deviation is considered normal
>      2. We feel complete and sufficient
>      3. We are 'mean' lovers
>      4. Statisticians do it discretely and continuously
>      5. We are right 95% of the time
>      6. We can legally comment on someone's posterior distribution
>      7. We may not be normal, but we are transformable
>      8. We never have to say we are certain
>      9. We are honestly significantly different
>     10. No one wants our jobs
> 
> 
> Visitate il portale http://www.modugno.it/
> e in particolare la sezione su Palese  
> http://www.modugno.it/archivio/palesesanto_spirito/
> 
> ______________________________________________
> R-help@stat.math.ethz.ch mailing list 
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide! 
> http://www.R-project.org/posting-guide.html
>

______________________________________________
R-help@stat.math.ethz.ch mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html

Re: [R] Cochran-Armitage-trend-test

Reply via email to