On Thu, 2005-06-23 at 23:12 +0800, ronggui wrote: > i have a data frame(dat) which has many variables.and i use the > following script to get the crosstable. > > >danx2<-c("x1.1","x1.2","x1.3","x1.4","x1.5","x2","x4","x5","x6","x7","x8.1","x8.2","x8.3","x8.4","x11", > "x13","x17","x19","x20","x21") > >indep<-c("x23","x24","x25","x26","x27","x28.1","x28.2","x29") > >for (k in indep){ > for (i in danx2){ > a<-chisq.test(dat[,i],dat[,k])$p.v<=0.05 > if (a) > {CrossTable(dat[,i],dat[,k],chisq=T,format="SPSS");cat(rep("=",50),"\n","\n")} > } > > it has a little pitfall:the dimnames of table is dat[,i] and > dat[,k],but i want it to be like x2,x23... > is there any good way to do this? > and in the command CrossTable(dat[,i],dat[,k],chisq=T,format="SPSS") > in the loop,is there any other way to get the variable other than > dat[,i] and dat[,k]? > thank you !
Hi, I am in between meetings here. Sorry for the delay in my reply to your query. The best solution is for me to add two new args to CrossTable() to allow you to specify these names explicitly, rather than having them as the way they are now, which simply takes the x and y args and does: RowData <- deparse(substitute(x)) ColData <- deparse(substitute(y)) The result is that whatever is passed as the x and y arguments, will be used as the titles for the row and column labels as you have noted. In the mean time, I am attaching an update to CrossTable (which I have not extensively tested yet), that you can source() into R via the console. The update has two new args called "RowData" and "ColData" which will default to NULL, so as to not impact current default behavior. You can then set these as part of your loop by passing the index values. Using one of the examples in ?CrossTable: > CrossTable(infert$education, infert$induced, RowData = "Education", ColData = "Induced") Cell Contents |-------------------------| | N | | Chi-square contribution | | N / Row Total | | N / Col Total | | N / Table Total | |-------------------------| Total Observations in Table: 248 | Induced Education | 0 | 1 | 2 | Row Total | -------------|-----------|-----------|-----------|-----------| 0-5yrs | 4 | 2 | 6 | 12 | | 1.232 | 0.506 | 9.898 | | | 0.333 | 0.167 | 0.500 | 0.048 | | 0.028 | 0.029 | 0.162 | | | 0.016 | 0.008 | 0.024 | | -------------|-----------|-----------|-----------|-----------| 6-11yrs | 78 | 27 | 15 | 120 | | 1.121 | 1.059 | 0.471 | | | 0.650 | 0.225 | 0.125 | 0.484 | | 0.545 | 0.397 | 0.405 | | | 0.315 | 0.109 | 0.060 | | -------------|-----------|-----------|-----------|-----------| 12+ yrs | 61 | 39 | 16 | 116 | | 0.518 | 1.627 | 0.099 | | | 0.526 | 0.336 | 0.138 | 0.468 | | 0.427 | 0.574 | 0.432 | | | 0.246 | 0.157 | 0.065 | | -------------|-----------|-----------|-----------|-----------| Column Total | 143 | 68 | 37 | 248 | | 0.577 | 0.274 | 0.149 | | -------------|-----------|-----------|-----------|-----------| Let me know if this works or you find a problem. I will do further testing here as soon as time permits and get an update to Greg and Nitin to include into gregmisc. HTH, Marc Schwartz
CrossTable <- function (x, y, digits = 3, max.width = 5, expected = FALSE, prop.r = TRUE, prop.c = TRUE, prop.t = TRUE, prop.chisq=TRUE, chisq = FALSE, fisher = FALSE, mcnemar = FALSE, resid = FALSE, sresid = FALSE, asresid = FALSE, missing.include = FALSE, format=c("SAS","SPSS"), RowData=NULL, ColData=NULL, ... ) { format=match.arg(format) ## Ensure that max.width >= 1 if (max.width < 1) stop("max.width must be >= 1") ## Set 'x' vector flag vector.x <- FALSE ## Ensure that if (expected), a chisq is done if (expected) chisq <- TRUE if (missing(y)) { ## is x a vector? if (is.null(dim(x))) { #TotalN <- length(x) if (missing.include) x <- factor(x,exclude=NULL) else ## Remove any unused factor levels x <- factor(x) t <- t(as.matrix(table(x))) vector.x <- TRUE } ## is x a matrix? else if (length(dim(x) == 2)) { if(any(x < 0) || any(is.na(x))) stop("all entries of x must be nonnegative and finite") ## Add generic dimnames if required ## check each dimname separately, in case user has defined one or the other if (is.null(rownames(x))) rownames(x) <- paste("[", 1:nrow(x), ",]", sep = "") if (is.null(colnames(x))) colnames(x) <- paste("[,", 1:ncol(x), "]", sep = "") t <- x } else stop("x must be either a vector or a 2 dimensional matrix, if y is not given") } else { if(length(x) != length(y)) stop("x and y must have the same length") ## Create Titles for Table From Vector Names if(is.null(RowData)) RowData <- deparse(substitute(x)) if (is.null(ColData)) ColData <- deparse(substitute(y)) if (missing.include) { x <- factor(x,exclude=c()) y <- factor(y,exclude=c()) } else { ## Remove unused factor levels from vectors x <- factor(x) y <- factor(y) } ## Generate table t <- table(x, y) } ## if t is not at least a 2 x 2, do not do stats ## even if any set to TRUE. Do not do col/table props if (any(dim(t) < 2)) { prop.c <- prop.r <- prop.chisq <- chisq <- expected <- fisher <- mcnemar <- FALSE } ## Generate cell proportion of row CPR <- prop.table(t, 1) ## Generate cell proportion of col CPC <- prop.table(t, 2) ## Generate cell proportion of total CPT <- prop.table(t) ## Generate summary counts GT <- sum(t) RS <- rowSums(t) CS <- colSums(t) if (length(dim(x) == 2)) TotalN <- GT else TotalN <- length(x) ## Column and Row Total Headings ColTotal <- "Column Total" RowTotal <- "Row Total" ## Set consistent column widths based upon dimnames and table values CWidth <- max(digits + 2, c(nchar(t), nchar(dimnames(t)[[2]]), nchar(RS), nchar(CS), nchar(RowTotal))) RWidth <- max(c(nchar(dimnames(t)[[1]]), nchar(ColTotal))) ## Adjust first column width if Data Titles present if (exists("RowData")) RWidth <- max(RWidth, nchar(RowData)) ## Create row separators RowSep <- paste(rep("-", CWidth + 2), collapse = "") RowSep1 <- paste(rep("-", RWidth + 1), collapse = "") SpaceSep1 <- paste(rep(" ", RWidth), collapse = "") SpaceSep2 <- paste(rep(" ", CWidth), collapse = "") ## Create formatted Names FirstCol <- formatC(dimnames(t)[[1]], width = RWidth, format = "s") ColTotal <- formatC(ColTotal, width = RWidth, format = "s") RowTotal <- formatC(RowTotal, width = CWidth, format = "s") ## Perform Chi-Square Tests ## Needs to be before the table output, in case (expected = TRUE) if (chisq) { if (all(dim(t) == 2)) CSTc <- chisq.test(t, correct = TRUE, ...) CST <- chisq.test(t, correct = FALSE, ...) } else CST <- suppressWarnings(chisq.test(t, correct = FALSE)) if (asresid & !vector.x) ASR <- (CST$observed-CST$expected)/sqrt(CST$expected*((1-RS/GT) %*% t(1-CS/GT))) print.CrossTable.SAS <- function() { if (exists("RowData")) { cat(SpaceSep1, "|", ColData, "\n") cat(formatC(RowData, width = RWidth, format= "s"), formatC(dimnames(t)[[2]], width = CWidth, format = "s"), RowTotal, sep = " | ", collapse = "\n") } else cat(SpaceSep1, formatC(dimnames(t)[[2]], width = CWidth, format = "s"), RowTotal, sep = " | ", collapse = "\n") cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") ## Print table cells for (i in 1:nrow(t)) { cat(FirstCol[i], formatC(c(t[i, ], RS[i]), width = CWidth, format = "d"), sep = " | ", collapse = "\n") if (expected) cat(SpaceSep1, formatC(CST$expected[i, ], digits = digits, format = "f", width = CWidth), SpaceSep2, sep = " | ", collapse = "\n") if (prop.chisq) cat(SpaceSep1, formatC((((CST$expected[i, ]-t[i, ])^2)/CST$expected[i, ]), width = CWidth, digits = digits, format = "f"), SpaceSep2, sep = " | ", collapse = "\n") if (prop.r) cat(SpaceSep1, formatC(c(CPR[i, ], RS[i]/GT), width = CWidth, digits = digits, format = "f"), sep = " | ", collapse = "\n") if (prop.c) cat(SpaceSep1, formatC(CPC[i, ], width = CWidth, digits = digits, format = "f"), SpaceSep2, sep = " | ", collapse = "\n") if (prop.t) cat(SpaceSep1, formatC(CPT[i, ], width = CWidth, digits = digits, format = "f"), SpaceSep2, sep = " | ", collapse = "\n") cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") } ## Print Column Totals cat(ColTotal, formatC(c(CS, GT), width = CWidth, format = "d"), sep = " | ", collapse = "\n") if (prop.c) cat(SpaceSep1, formatC(CS/GT, width = CWidth, digits = digits, format = "f"), SpaceSep2, sep = " | ", collapse = "\n") cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") } ## End Of print.Crosstable.SAS function print.CrossTable.SPSS <- function() { ## similar to SPSS behaviour ## Print Column headings if (exists("RowData")) { cat(SpaceSep1, "|", ColData, "\n") cat(cat(formatC(RowData, width = RWidth, format = "s"),sep=" | ", collapse=""), cat(formatC(dimnames(t)[[2]], width = CWidth-1, format = "s"), sep=" | ", collapse=""), cat(RowTotal, sep = " | ", collapse = "\n"), sep="", collapse="") } else cat(SpaceSep1, formatC(dimnames(t)[[2]], width = CWidth, format = "s"), RowTotal, sep = " | ", collapse = "\n") cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") ## Print table cells for (i in 1:nrow(t)) { cat(cat(FirstCol[i], sep=" | ", collapse=""), cat(formatC(c(t[i, ], RS[i]), width = CWidth-1, format = "d"), sep = " | ", collapse = "\n"), sep="", collapse="") if (expected) cat(cat(SpaceSep1, sep=" | ", collapse=""), cat(formatC(CST$expected[i, ], digits = digits, format = "f", width = CWidth-1), sep=" | ", collapse=""), cat(SpaceSep2, sep = " | ", collapse = "\n"), sep="", collapse="") if (prop.chisq) cat(cat(SpaceSep1, sep=" | ", collapse=""), cat(formatC((((CST$expected[i, ]-t[i, ])^2)/CST$expected[i, ]), digits = digits, format = "f", width = CWidth-1), sep=" | ", collapse=""), cat(SpaceSep2, sep = " | ", collapse = "\n"), sep="", collapse="") if (prop.r) cat(cat(SpaceSep1, sep=" | ", collapse=""), cat(formatC(c(CPR[i, ]*100, 100*RS[i] / GT), width = CWidth-1, digits = digits, format = "f"), sep = "% | ", collapse = "\n"), sep="", collapse="") if (prop.c) cat(cat(SpaceSep1, sep=" | ", collapse=""), cat(formatC(CPC[i, ]*100, width = CWidth-1, digits = digits, format = "f"), sep="% | ", collapse=""), cat(SpaceSep2, sep = " | ", collapse = "\n"), sep="", collapse="") if (prop.t) cat(cat(SpaceSep1, sep=" | ", collapse=""), cat(formatC(CPT[i, ]*100, width = CWidth-1, digits = digits, format = "f"), sep="% | ", collapse=""), cat(SpaceSep2, sep = " | ", collapse = "\n"), sep="", collapse="") if (resid) cat(cat(SpaceSep1,sep=" | ",collapse = ""), cat(formatC(CST$observed[i, ]-CST$expected[i, ], digits = digits, format = "f", width = CWidth-1), sep = " | ", collapse = ""), cat(SpaceSep2,sep = " | ", collapse = "\n"),sep="",collapse="") if (sresid) cat(cat(SpaceSep1,sep=" | ",collapse = ""), cat(formatC(CST$residual[i, ], digits = digits, format = "f", width = CWidth-1), sep = " | ", collapse = ""), cat(SpaceSep2,sep = " | ", collapse = "\n"),sep="",collapse="") if (asresid) cat(cat(SpaceSep1,sep=" | ",collapse = ""), cat(formatC(ASR[i, ], digits = digits, format = "f", width = CWidth-1), sep = " | ", collapse = ""), cat(SpaceSep2,sep = " | ", collapse = "\n"),sep="",collapse="") cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") } ## Print Column Totals cat(cat(ColTotal,sep=" | ",collapse=""), cat(formatC(c(CS, GT), width = CWidth-1, format = "d"), sep = " | ", collapse = "\n"),sep="",collapse="") if (prop.c) cat(cat(SpaceSep1,sep=" | ",collapse=""), cat(formatC(100*CS/GT, width = CWidth-1, digits = digits, format = "f"),sep = "% | ", collapse = ""), cat(SpaceSep2,sep = " | ", collapse = "\n"),sep="",collapes="") cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n") } ## End of print.CrossTable.SPSS function ## Print Function For 1 X N Vector In SAS Format print.CrossTable.vector.SAS <- function() { if (length(t) > max.width) { ## set breakpoints for output based upon max.width final.row <- length(t) %% max.width max <- length(t) - final.row ## Define breakpoint indices for each row start <- seq(1, max, max.width) end <- start + (max.width - 1) ## Add final.row if required if (final.row > 0) { start <- c(start, end[length(end)] + 1) end <- c(end, end[length(end)] + final.row) } } else { ## Each value printed horizontally in a single row start <- 1 end <- length(t) } SpaceSep3 <- paste(SpaceSep2, " ", sep = "") for (i in 1:length(start)) { ## print column labels cat(SpaceSep2, formatC(dimnames(t)[[2]][start[i]:end[i]], width = CWidth, format = "s"), sep = " | ", collapse = "\n") cat(SpaceSep3, rep(RowSep, (end[i] - start[i]) + 1), sep = "|", collapse = "\n") cat(SpaceSep2, formatC(t[, start[i]:end[i]], width = CWidth, format = "d"), sep = " | ", collapse = "\n") cat(SpaceSep2, formatC(CPT[, start[i]:end[i]], width = CWidth, digits = digits, format = "f"), sep = " | ", collapse = "\n") cat(SpaceSep3, rep(RowSep, (end[i] - start[i]) + 1), sep = "|", collapse = "\n") cat("\n\n") } } ## End of print.Crosstable.vector.SAS function ## Print function for 1 X N vector in SPSS format print.CrossTable.vector.SPSS <- function() { if (length(t) > max.width) { ## set breakpoints for output based upon max.width final.row <- length(t) %% max.width max <- length(t) - final.row ## Define breakpoint indices for each row start <- seq(1, max, max.width) end <- start + (max.width - 1) ## Add final.row if required if (final.row > 0) { start <- c(start, end[length(end)] + 1) end <- c(end, end[length(end)] + final.row) } } else { ## Each value printed horizontally in a single row start <- 1 end <- length(t) } SpaceSep3 <- paste(SpaceSep2, " ", sep = "") for (i in 1:length(start)) { cat(cat(SpaceSep2,sep=" | ",collapse=""), cat(formatC(dimnames(t)[[2]][start[i]:end[i]], width = CWidth-1, format = "s"), sep = " | ", collapse = "\n"), sep="",collapse="") cat(SpaceSep3, rep(RowSep, (end[i] - start[i]) + 1), sep = "|", collapse = "\n") cat(cat(SpaceSep2,sep=" | ",collapse=""), cat(formatC(t[, start[i]:end[i]], width = CWidth-1, format = "d"), sep = " | ", collapse = "\n"), sep="",collapse="") cat(cat(SpaceSep2, sep=" | ",collapse=""), cat(formatC(CPT[, start[i]:end[i]], width = CWidth-1, digits = digits, format = "f"), sep = "% | ", collapse = ""),sep="",collapse="\n") cat(SpaceSep3, rep(RowSep, (end[i] - start[i]) + 1), sep = "|", collapse = "\n") } ## End of for (i in 1:length(start)) if (GT < TotalN) cat("\nNumber of Missing Observations: ",TotalN-GT," (",100*(TotalN-GT)/TotalN,"%)\n",sep="") } ## End of print.CrossTable.vector.SPSS Function print.statistics <- function() { ## Print Statistics if (chisq) { cat(rep("\n", 2)) cat("Statistics for All Table Factors\n\n\n") cat(CST$method,"\n") cat("------------------------------------------------------------\n") cat("Chi^2 = ", CST$statistic, " d.f. = ", CST$parameter, " p = ", CST$p.value, "\n\n") if (all(dim(t) == 2)) { cat(CSTc$method,"\n") cat("------------------------------------------------------------\n") cat("Chi^2 = ", CSTc$statistic, " d.f. = ", CSTc$parameter, " p = ", CSTc$p.value, "\n") } } ## Perform McNemar tests if (mcnemar) { McN <- mcnemar.test(t, correct = FALSE) cat(rep("\n", 2)) cat(McN$method,"\n") cat("------------------------------------------------------------\n") cat("Chi^2 = ", McN$statistic, " d.f. = ", McN$parameter, " p = ", McN$p.value, "\n\n") if (all(dim(t) == 2)) { McNc <- mcnemar.test(t, correct = TRUE) cat(McNc$method,"\n") cat("------------------------------------------------------------\n") cat("Chi^2 = ", McNc$statistic, " d.f. = ", McNc$parameter, " p = ", McNc$p.value, "\n") } } ## Perform Fisher Tests if (fisher) { cat(rep("\n", 2)) FTt <- fisher.test(t, alternative = "two.sided") if (all(dim(t) == 2)) { FTl <- fisher.test(t, alternative = "less") FTg <- fisher.test(t, alternative = "greater") } cat("Fisher's Exact Test for Count Data\n") cat("------------------------------------------------------------\n") if (all(dim(t) == 2)) { cat("Sample estimate odds ratio: ", FTt$estimate, "\n\n") cat("Alternative hypothesis: true odds ratio is not equal to 1\n") cat("p = ", FTt$p.value, "\n") cat("95% confidence interval: ", FTt$conf.int, "\n\n") cat("Alternative hypothesis: true odds ratio is less than 1\n") cat("p = ", FTl$p.value, "\n") cat("95% confidence interval: ", FTl$conf.int, "\n\n") cat("Alternative hypothesis: true odds ratio is greater than 1\n") cat("p = ", FTg$p.value, "\n") cat("95% confidence interval: ", FTg$conf.int, "\n\n") } else { cat("Alternative hypothesis: two.sided\n") cat("p = ", FTt$p.value, "\n") } } ## End Of If(Fisher) Loop cat(rep("\n", 2)) ## Create list of results for invisible() CT <- list(t = t, prop.row = CPR, prop.col = CPC, prop.tbl = CPT) if (any(chisq, fisher, mcnemar)) { if (all(dim(t) == 2)) { if (chisq) CT <- c(CT, list(chisq = CST, chisq.corr = CSTc)) if (fisher) CT <- c(CT, list(fisher.ts = FTt, fisher.tl = FTl, fisher.gt = FTg)) if (mcnemar) CT <- c(CT, list(mcnemar = McN, mcnemar.corr = McNc)) } else { if (chisq) CT <- c(CT, list(chisq = CST)) if (fisher) CT <- c(CT, list(fisher.ts = FTt)) if (mcnemar) CT <- c(CT, list(mcnemar = McN)) } } ## End of if(any(chisq, fisher, mcnemar)) loop ## return list(CT) invisible(CT) } ## End of print.statistics function ## Printing the tables if (format=="SAS") { ## Print Cell Layout cat(rep("\n", 2)) cat(" Cell Contents\n") cat("|-------------------------|\n") cat("| N |\n") if (expected) cat("| Expected N |\n") if (prop.chisq) cat("| Chi-square contribution |\n") if (prop.r) cat("| N / Row Total |\n") if (prop.c) cat("| N / Col Total |\n") if (prop.t) cat("| N / Table Total |\n") cat("|-------------------------|\n") cat(rep("\n", 2)) cat("Total Observations in Table: ", GT, "\n") cat(rep("\n", 2)) if (!vector.x) print.CrossTable.SAS() else print.CrossTable.vector.SAS() print.statistics() } else if (format == "SPSS") { ## Print Cell Layout cat("\n") cat(" Cell Contents\n") cat("|-------------------------|\n") cat("| Count |\n") if (!vector.x) { if (expected) cat("| Expected Values |\n") if (prop.chisq) cat("| Chi-square contribution |\n") if (prop.r) cat("| Row Percent |\n") if (prop.c) cat("| Column Percent |\n") if (prop.t) cat("| Total Percent |\n") if (resid) cat("| Residual |\n") if (sresid) cat("| Std Residual |\n") if (asresid) cat("| Adj Std Resid |\n") } else cat("| Row Percent |\n") cat("|-------------------------|\n") cat("\n") cat("Total Observations in Table: ", GT, "\n") cat("\n") if (!vector.x) print.CrossTable.SPSS() else print.CrossTable.vector.SPSS() print.statistics() if (any(dim(t) >= 2) & any(chisq,mcnemar,fisher)) { MinExpF = min(CST$expected) cat(' Minimum expected frequency:',MinExpF,"\n") NMinExpF = length(CST$expected[which(CST$expected<5)]) if (NMinExpF > 0) { NCells = length(CST$expected) cat('Cells with Expected Frequency < 5: ',NMinExpF,' of ',NCells," (",100*NMinExpF/NCells,"%)\n",sep="") } cat("\n") } ## End of if (any(dim(t)... } ## End of if(format=="SPSS") loop else stop("unknown format") } ## End of the main function Crosstable.R
______________________________________________ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html