Hi there, I often do receive some mails about this piece of code regarding Cochran-Armitage or Mantel Chi square. The archived mail does unfortunately lack some pieces of code (function "scores"). I copy there all my raw code that I did implement to mimic SAS PROC FREQ statistics regarding the analysis of contingency tables. Whoever is interested to take it and rework it a little bit (for example redefining outputs so that they suits a htest object) is welcome.
Best wishes, Eric ----- # R functions to provides statistics on contingency tables # Mimics SAS PROC FREQ outputs # Implementation is the one described in SAS PROC FREQ manual # Eric Lecoutre <[EMAIL PROTECTED] # Feel free to use / modify / document / add to a package #------------------------------------ UTILITARY FUNCTIONS ------------------------------------# print.ordtest=function(l,...) { tmp=matrix(c(l$estimate,l$ASE),nrow=1) dimnames(tmp)=list(l$name,c("Estimate","ASE")) print(round(tmp,4),...) } compADPQ=function(x) { nr=nrow(x) nc=ncol(x) Aij=matrix(0,nrow=nr,ncol=nc) Dij=matrix(0,nrow=nr,ncol=nc) for (i in 1:nr) { for (j in 1:nc) { Aij[i,j]=sum(x[1:i,1:j])+sum(x[i:nr,j:nc])-sum(x[i,])-sum(x[,j]) Dij[i,j]=sum(x[i:nr,1:j])+sum(x[1:i,j:nc])-sum(x[i,])-sum(x[,j]) }} P=sum(x*Aij) Q=sum(x*Dij) return(list(Aij=Aij,Dij=Dij,P=P,Q=Q)) } scores=function(x,MARGIN=1,method="table",...) { # MARGIN # 1 - row # 2 - columns # Methods for ranks are # # x - default # rank # ridit # modridit if (method=="table") { if (is.null(dimnames(x))) return(1:(dim(x)[MARGIN])) else { options(warn=-1) if (sum(is.na(as.numeric(dimnames(x)[[MARGIN]])))>0) { out=(1:(dim(x)[MARGIN])) } else { out=(as.numeric(dimnames(x)[[MARGIN]])) } options(warn=0) } } else { ### method is a rank one Ndim=dim(x)[MARGIN] OTHERMARGIN=3-MARGIN ranks=c(0,(cumsum(apply(x,MARGIN,sum))))[1:Ndim]+(apply(x,MARGIN,sum)+1) /2 if (method=="ranks") out=ranks if (method=="ridit") out=ranks/(sum(x)) if (method=="modridit") out=ranks/(sum(x)+1) } return(out) } #------------------------------------ FUNCTIONS ------------------------------------# tablegamma=function(x) { # Statistic tmp=compADPQ(x) P=tmp$P Q=tmp$Q gamma=(P-Q)/(P+Q) # ASE Aij=tmp$Aij Dij=tmp$Dij tmp1=4/(P+Q)^2 tmp2=sqrt(sum((Q*Aij - P*Dij)^2 * x)) gamma.ASE=tmp1*tmp2 # Test var0=(4/(P+Q)^2) * (sum(x*(Aij-Dij)^2) - ((P-Q)^2)/sum(x)) tb=gamma/sqrt(var0) p.value=2*(1-pnorm(tb)) # Output out=list(estimate=gamma,ASE=gamma.ASE,statistic=tb,p.value=p.value,name= "Gamma",bornes=c(-1,1)) class(out)="ordtest" return(out) } tabletauc=function(x) { tmp=compADPQ(x) P=tmp$P Q=tmp$Q m=min(dim(x)) n=sum(x) # statistic tauc=(m*(P-Q))/(n^2*(m-1)) # ASE Aij=tmp$Aij Dij=tmp$Dij dij=Aij-Dij tmp1=2*m/((m-1)*n^2) tmp2= sum(x * dij^2) - (P-Q)^2/n ASE=tmp1*sqrt(tmp2) # Test tb=tauc/ASE p.value=2*(1-pnorm(tb)) # Output out=list(estimate=tauc,ASE=ASE,statistic=tb,p.value=p.value,name="Kendal l's tau-c",bornes=c(-1,1)) class(out)="ordtest" return(out) } tabletaub=function(x) { # Statistic tmp=compADPQ(x) P=tmp$P Q=tmp$Q n=sum(x) wr=n^2 - sum(apply(x,1,sum)^2) wc=n^2 - sum(apply(x,2,sum)^2) taub=(P-Q)/sqrt(wr*wc) # ASE Aij=tmp$Aij Dij=tmp$Dij w=sqrt(wr*wc) dij=Aij-Dij nidot=apply(x,1,sum) ndotj=apply(x,2,sum) n=sum(x) vij=outer(nidot,ndotj, FUN=function(a,b) return(a*wc+b*wr)) tmp1=1/(w^2) tmp2= sum(x*(2*w*dij + taub*vij)^2) tmp3=n^3*taub^2*(wr+wc)^2 tmp4=sqrt(tmp2-tmp3) taub.ASE=tmp1*tmp4 # Test var0=4/(wr*wc) * (sum(x*(Aij-Dij)^2) - (P-Q)^2/n) tb=taub/sqrt(var0) p.value=2*(1-pnorm(tb)) # Output out=list(estimate=taub,ASE=taub.ASE,statistic=tb,p.value=p.value,name="K endall's tau-b",bornes=c(-1,1)) class(out="ordtest") return(out) } tablesomersD=function(x,dep=2) { # dep: which dimension stands for the dependant variable # 1 - ROWS # 2 - COLS # Statistic if (dep==1) x=t(x) tmp=compADPQ(x) P=tmp$P Q=tmp$Q n=sum(x) wr=n^2 - sum(apply(x,1,sum)^2) somers=(P-Q)/wr # ASE Aij=tmp$Aij Dij=tmp$Dij dij=Aij-Dij tmp1=2/wr^2 tmp2=sum(x*(wr*dij - (P-Q)*(n-apply(x,1,sum)))^2) ASE=tmp1*sqrt(tmp2) # Test var0=4/(wr^2) * (sum(x*(Aij-Dij)^2) - (P-Q)^2/n) tb=somers/sqrt(var0) p.value=2*(1-pnorm(tb)) # Output if (dep==1) dir="R|C" else dir= "C|R" name=paste("Somer's D",dir) out=list(estimate=somers,ASE=ASE,statistic=tb,p.value=p.value,name=name, bornes=c(-1,1)) class(out)="ordtest" return(out) } #out=table.somersD(data) tablepearson=function(x,scores.type="table") { # Statistic sR=scores(x,1,scores.type) sC=scores(x,2,scores.type) n=sum(x) Rbar=sum(apply(x,1,sum)*sR)/n Cbar=sum(apply(x,2,sum)*sC)/n ssr=sum(x*(sR-Rbar)^2) ssc=sum(t(x)* (sC-Cbar)^2) tmpij=outer(sR,sC,FUN=function(a,b) return((a-Rbar)*(b-Cbar))) ssrc= sum(x*tmpij) v=ssrc w=sqrt(ssr*ssc) r=v/w # ASE bij=outer(sR,sC, FUN=function(a,b)return((a-Rbar)^2*ssc + (b-Cbar)^2*ssr)) tmp1=1/w^2 tmp2=x*(w*tmpij - (bij*v)/(2*w))^2 tmp3=sum(tmp2) ASE=tmp1*sqrt(tmp3) # Test var0= (sum(x*tmpij) - (ssrc^2/n))/ (ssr*ssc) tb=r/sqrt(var0) p.value=2*(1-pnorm(tb)) # Output out=list(estimate=r,ASE=ASE,statistic=tb,p.value=p.value,name="Pearson Correlation",bornes=c(-1,1)) class(out)="ordtest" return(out) } # table.pearson(data) tablespearman=function(x) { # Details algorithme manuel SAS PROC FREQ page 540 # Statistic n=sum(x) nr=nrow(x) nc=ncol(x) tmpd=cbind(expand.grid(1:nr,1:nc)) ind=rep(1:(nr*nc),as.vector(x)) tmp=tmpd[ind,] rhos=cor(apply(tmp,2,rank))[1,2] # ASE Ri=scores(x,1,"ranks")- n/2 Ci=scores(x,2,"ranks")- n/2 sr=apply(x,1,sum) sc=apply(x,2,sum) F=n^3 - sum(sr^3) G=n^3 - sum(sc^3) w=(1/12)*sqrt(F*G) vij=data for (i in 1:nrow(x)) { qi=0 if (i<nrow(x)) { for (k in i:nrow(x)) qi=qi+sum(x[k,]*Ci) } } for (j in 1:ncol(x)) { qj=0 if (j<ncol(x)) { for (k in j:ncol(x)) qj=qj+sum(x[,k]*Ri) } vij[i,j]=n*(Ri[i]*Ci[j] + 0.5*sum(x[i,]*Ci)+0.5*sum(data[,j]*Ri) +qi+qj) } v=sum(data*outer(Ri,Ci)) wij=-n/(96*w)*outer(sr,sc,FUN=function(a,b) return(a^2*G+b^2*F)) zij=w*vij-v*wij zbar=sum(data*zij)/n vard=(1/(n^2*w^4))*sum(x*(zij-zbar)^2) ASE=sqrt(vard) # Test vbar=sum(x*vij)/n p1=sum(x*(vij-vbar)^2) p2=n^2*w^2 var0=p1/p2 stat=rhos/sqrt(var0) # Output out=list(estimate=rhos,ASE=ASE,name="Spearman correlation",bornes=c(-1,1)) class(out)="ordtest" return(out) } #tablespearman(data) tablelambdasym=function(x) { # Statistic ri = apply(x,1,max) r=max(apply(x,2,sum)) n=sum(x) cj=apply(x,2,max) c=max(apply(x,1,sum)) sri=sum(ri) w=2*n - r -c v=2*n - sri - sum(cj) lambda=(w-v)/w # ASE ... tmpSi=0 l=min(which(apply(x,2,sum)==r)) for (i in 1:length(ri)) { li=min(which(x[i,]==ri[i])) if (li==l) tmpSi=tmpSi+x[i,li] } tmpSj=0 k=min(which(apply(x,1,sum)==c)) for (j in 1:length(cj)) { kj=min(which(x[,j]==cj[j])) if (kj==k) tmpSj=tmpSj+x[kj,j] } rk=max(x[k,]) cl=max(x[,l]) tmpx=tmpSi+tmpSj+rk+cl y=8*n-w-v-2*tmpx nkl=x[k,l] tmpSij=0 for (i in 1:nrow(x)) { for (j in 1:ncol(x)) { li=min(which(x[i,]==ri[i])) kj=min(which(x[,j]==cj[j])) tmpSij=tmpSij+x[kj,li] } } ASE=(1/(w^2))*sqrt(w*v*y-(2*(w^2)*(n-tmpSij))-2*(v^2)*(n-nkl)) # Output out=list(estimate=lambda,ASE=ASE,name="Lambda Symetric",bornes=c(0,1)) class(out)="ordtest" return(out) } #tablelambdasym(data) tablelambdaasym=function(x,transpose=FALSE) { # Statistic if (transpose==TRUE) x=t(x) ri = apply(x,1,max) r=max(apply(x,2,sum)) sri=sum(ri) n=sum(x) lambda=(sum(ri)-r)/(n-r) # ASE l=min(which(apply(x,2,sum)==r)) tmp=0 for (i in 1:length(ri)) { li=min(which(x[i,]==ri[i])) if (li==l) tmp=tmp+x[i,li] } ASE=sqrt(((n-sri)/(n-r)^3) *(sri+r-2*tmp)) # Output if (transpose) dir="R|C" else dir= "C|R" name=paste("Lambda asymetric",dir) out=list(estimate=lambda,ASE=ASE,name=name,bornes=c(0,1)) class(out)="ordtest" return(out) } tableUCA=function(x,transpose=TRUE) { if (transpose==TRUE) x=t(x) # Statistic n=sum(x) ni=apply(x,1,sum) nj=apply(x,2,sum) Hx=-sum((ni/n)*log(ni/n)) Hy=-sum((nj/n)*log(nj/n)) Hxy=-sum((x/n)*log(x/n)) v=Hx+Hy- Hxy w=Hy U=v/w # ASE tmp1=1/((n)*(w^2)) tmpij=0 for (i in 1:nrow(x)) { for (j in 1:ncol(x)) { tmpij=tmpij+( x[i,j]* ( Hy*log(x[i,j]/ni[i])+(Hx-Hxy)* log(nj[j]/n) )^2 ) } } ASE=tmp1*sqrt(tmpij) # Output if (transpose) dir="R|C" else dir= "C|R" name=paste("Uncertainty Coefficient",dir) out=list(estimate=U,ASE=ASE,name=name,bornes=c(0,1)) class(out)="ordtest" return(out) } #tableUCA(data) tableUCS=function(x) { # Statistic n=sum(x) ni=apply(x,1,sum) nj=apply(x,2,sum) Hx=-sum((ni/n)*log(ni/n)) Hy=-sum((nj/n)*log(nj/n)) Hxy=-sum((x/n)*log(x/n)) U=(2*(Hx+Hy-Hxy))/(Hx+Hy) # ASE tmpij=0 for (i in 1:nrow(x)) { for (j in 1:ncol(x)) { tmpij=tmpij+( x[i,j]* (Hxy*log(ni[i]*nj[j]/n^2) - (Hx+Hy)*log(x[i,j]/n))^2 /(n^2*(Hx+Hy)^4) ) } } ASE=2*sqrt(tmpij) # Output name="Uncertainty Coefficient Symetric" out=list(estimate=U,ASE=ASE,name=name,bornes=c(0,1)) class(out)="ordtest" return(out) } tablelinear=function(x,scores.type="table") { r=tablepearson(x,scores.type)$estimate n=sum(x) ll=r^2*(n-1) out=list(estimate=ll) return(out) } tablephi=function(x) { if (all.equal(dim(x),c(2,2))==TRUE) { rtot=apply(x,1,sum) ctot=apply(x,2,sum) phi= det(x)/sqrt(prod(rtot)*prod(ctot)) } else { Qp=chisq.test(x)$statistic phi=sqrt(Qp/sum(x)) } names(phi)="phi" return(phi=phi) } tableCramerV=function(x) { if (all.equal(dim(x),c(2,2))==TRUE) { cramerV=tablephi(x) } else { Qp=tableChisq(x)$estimate cramerV=sqrt((Qp/n)/min(dim(x)-1)) } names(cramerV)="Cramer's V" return(cramerV) } tableChisq=function(x) { nidot=apply(x,1,sum) ndotj=apply(x,2,sum) n=sum(nidot) eij=outer(nidot,ndotj,"*")/n R=length(nidot) C=length(ndotj) dll=(R-1)*(C-1) Qp=sum((x-eij)^2/eij) p.value=1-pchisq(Qp,dll) out=list(estimate=Qp,dll=dll,p.value=p.value,dim=c(R,C),name="Pearson's Chi-square") return(out) } tableChisqLR=function(x) { # Likelihood ratio Chi-squared test nidot=apply(x,1,sum) ndotj=apply(x,2,sum) n=sum(nidot) eij=outer(nidot,ndotj,"*")/n R=length(nidot) C=length(ndotj) dll=(R-1)*(C-1) G2=2*sum(x*log(x/eij)) p.value=1-pchisq(G2,dll) out=list(estimate=G2,dll=dll,p.value=p.value,dim=c(R,C),name="Likelihood ratio Chi-square") return(out) } tableChisqCA=function(x) { if (all.equal(dim(x),c(2,2))==TRUE) { nidot=apply(x,1,sum) ndotj=apply(x,2,sum) n=sum(nidot) eij=outer(nidot,ndotj,"*")/n R=length(nidot) C=length(ndotj) dll=(R-1)*(C-1) tmp=as.vector(abs(x-eij)) tmp=pmax(tmp-0.5,0) tmp=matrix(tmp,byrow=TRUE,ncol=C) Qc=sum(tmp^2/eij) p.value=1-pchisq(Qc,dll) out=list(estimate=Qc,dll=dll,p.value=p.value,dim=c(R,C),name="Continuity adjusted Chi-square") return(out) } else { stop("Continuity-adjusted chi-square must be used with (2,2)-tables",call.=FALSE) } } tableChisqMH=function(x) { n=sum(x) G2=(n-1)*(tablepearson(x)$estimate^2) dll=1 p.value=1-pchisq(G2,dll) out=list(estimate=G2,dll=dll,p.value=p.value,dim=dim(x),name="Mantel-Hae nszel Chi-square") return(out) } tableCC=function(x) { Qp=tableChisq(x)$estimate n=sum(x) P=sqrt(Qp/(Qp+n)) m=min(dim(x)) out=list(estimate=P,dim=dim(x),bornes=c(0,sqrt((m-1)/m)),name="Contingen cy coefficient") return(out) } tabletrend=function(x,transpose=FALSE) { if (any(dim(x)==2)) { if (transpose==TRUE) { x=t(x) } if (dim(x)[2]!=2){stop("Cochran-Armitage test for trend must be used with a (R,2) table. Use transpose argument",call.=FALSE) } nidot=apply(x,1,sum) n=sum(nidot) Ri=scores(x,1,"table") Rbar=sum(nidot*Ri)/n s2=sum(nidot*(Ri-Rbar)^2) pdot1=sum(x[,1])/n T=sum(x[,1]*(Ri-Rbar))/sqrt(pdot1*(1-pdot1)*s2) p.value.uni=1-pnorm(abs(T)) p.value.bi=2*p.value.uni out=list(estimate=T,dim=dim(x),p.value.uni=p.value.uni,p.value.bi=p.valu e.bi,name="Cochran-Armitage test for trend") return(out) } else {stop("Cochran-Armitage test for trend must be used with a (2,C) or a (R,2) table",call.=FALSE) } } Eric Lecoutre UCL / Institut de Statistique Voie du Roman Pays, 20 1348 Louvain-la-Neuve Belgium tel: (+32)(0)10473050 [EMAIL PROTECTED] http://www.stat.ucl.ac.be/ISpersonnel/lecoutre If the statistics are boring, then you've got the wrong numbers. -Edward Tufte > -----Original Message----- > From: [EMAIL PROTECTED] > [mailto:[EMAIL PROTECTED] On Behalf Of Vito Ricci > Sent: jeudi 28 juillet 2005 16:30 > To: r-help@stat.math.ethz.ch > Cc: [EMAIL PROTECTED] > Subject: Re: [R] Cochran-Armitage-trend-test > > > Hi, > see: > http://finzi.psych.upenn.edu/R/Rhelp02a/archive/20396.html > > Regards, > Vito > > > > [EMAIL PROTECTED] wrote: > > Hi! > > I am searching for the Cochran-Armitage-trend-test. Is > it included in an > R-package? > > Thank you! > > > > Diventare costruttori di soluzioni > Became solutions' constructors > > "The business of the statistician is to catalyze > the scientific learning process." > George E. P. Box > > "Statistical thinking will one day be as necessary for > efficient citizenship as the ability to read and write" H. G. Wells > > Top 10 reasons to become a Statistician > > 1. Deviation is considered normal > 2. We feel complete and sufficient > 3. We are 'mean' lovers > 4. Statisticians do it discretely and continuously > 5. We are right 95% of the time > 6. We can legally comment on someone's posterior distribution > 7. We may not be normal, but we are transformable > 8. We never have to say we are certain > 9. We are honestly significantly different > 10. No one wants our jobs > > > Visitate il portale http://www.modugno.it/ > e in particolare la sezione su Palese > http://www.modugno.it/archivio/palesesanto_spirito/ > > ______________________________________________ > R-help@stat.math.ethz.ch mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide! > http://www.R-project.org/posting-guide.html > ______________________________________________ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html