> The only thing that I found for R is by Gregor Gorjanc, but the > information seems to be dated: > > http://www.bfro.uni-lj.si/MR/ggorjan/software/R/index.html#tagCloud
Hi, Yes, I have tried to create a tag cloud plot in R, but I abandoned the project due to other things. The main obstacle was that in R we need to take care of the fontsizes and placement of words, while this is very easy with say browsers, who do all the renderind. I tracked the last version of the R file which is pasted bellow. I must say that I do not remember the status of the code so use it as you wish. If anyone wishes to take this project further, please do so! gg ### tagCloud.R ###------------------------------------------------------------------------ ### What: Tag cloud plot functions ### Time-stamp: <2006-09-10 02:53:29 ggorjan> ###------------------------------------------------------------------------ tagCloud <- function(x, n=100, decreasing=TRUE, threshold=NULL, fontsize=c(12, 36), align=TRUE, expandRow=TRUE, justRow="bottom", title, textGpar=gpar(col="navy"), rectGpar=gpar(col="white"), titleGpar=gpar(), viewGpar=gpar(), mar=c(1, 1, 1, 1)) { UseMethod("tagCloud") } tagCloud.default <- function(x, n=100, decreasing=TRUE, threshold=NULL, fontsize=c(12, 36), align=TRUE, expandRow=TRUE, justRow="bottom", title, textGpar=gpar(col="navy"), rectGpar=gpar(col="white"), titleGpar=gpar(), viewGpar=gpar(), mar=c(1, 1, 1, 1)) { if(!is.null(dim(x))) stop("'x' must be a vector") tagCloud.table(table(x), n=n, decreasing=decreasing, fontsize=fontsize, threshold=threshold, align=align, expandRow=expandRow, justRow=justRow, title=title, textGpar=textGpar, rectGpar=rectGpar, titleGpar=titleGpar, viewGpar=viewGpar, mar=mar) } tagCloud.table <- function(x, n=100, decreasing=TRUE, threshold=NULL, fontsize=c(12, 36), align=TRUE, expandRow=TRUE, justRow="bottom", title, textGpar=gpar(col="navy"), rectGpar=gpar(col="white"), titleGpar=gpar(), viewGpar=gpar(), mar=c(1, 1, 1, 1)) { ## --- Check --- if(length(dim(x)) != 1) stop("'x' must be one dimensional table") ## --- Threshold --- if(!is.null(threshold)) x <- x[x >= threshold] ## --- Number of units --- N <- length(x) ## length of table if(is.null(n)) { ## if n=NULL, plot all units n <- N } else { if(n > N) n <- N ## if n is to big, decrease it if(n < 1) n <- round(N * n) ## if n is percentage of units } fontsizeLength <- length(fontsize) if(fontsizeLength != 2) stop("'fontsize' must be of length two") ## --- Sort and subset --- if(n < N) { ## only if we want to plot subset of units tmp <- sort(x, decreasing=decreasing) x <- x[names(x) %in% names(tmp[1:n])] } ## --- Get relative freq --- x <- prop.table(x) ## --- Fontsize --- fontsizeDiff <- diff(fontsize) xDiff <- max(x) - min(x) if(xDiff != 0) { off <- ifelse(fontsizeDiff > 0, min(x), max(x)) fontsize <- (x - off) / xDiff * fontsizeDiff + min(fontsize) } else { ## all units have the same frequency fontsize <- rep(min(fontsize), times=n) } ## --- Viewport and rectangle --- grid.newpage() width <- unit(1, "npc") height <- unit(1, "npc") vp <- viewport(y=unit(mar[1], "lines"), x=unit(mar[2], "lines"), , width=width - unit(mar[2] + mar[4], "lines"), height=height - unit(mar[1] + mar[3], "lines"), just=c("left", "bottom"), gp=viewGpar, name="main") pushViewport(vp) if(!missing(title)) grid.text(title, y=height, gp=titleGpar, name="title") grid.rect(gp=rectGpar, name="cloud") ## --- Grobs --- tag <- vector(mode="list", length=4) names(tag) <- c("fontsize", "grob", "width", "height") tag[[1]] <- tag[[2]] <- tag[[3]] <- tag[[4]] <- vector(mode="list", length=n) for(i in 1:n) { tag$fontsize[[i]] <- fontsize[i] tag$grob[[i]] <- textGrob(names(x[i]), gp=gpar(fontsize=fontsize[i])) tag$width[[i]] <- convertWidth(grobWidth(tag$grob[[i]]), unitTo="npc", valueOnly=TRUE) tag$height[[i]] <- convertHeight(grobHeight(tag$grob[[i]]), unitTo="npc", valueOnly=TRUE) } ## --- Split lines --- row <- colWidth <- vector(length=n) row[1] <- 1 colWidth[1] <- 0 lineWidth <- tag$width[[1]] j <- 1 gapWidth <- convertWidth(stringWidth(" "), unitTo="npc", valueOnly=TRUE) maxWidth <- convertWidth(width, unitTo="npc", valueOnly=TRUE) for(i in 2:length(tag$width)) { test <- lineWidth + gapWidth + tag$width[[i]] if(test < maxWidth) { row[i] <- row[i - 1] colWidth[i] <- lineWidth + gapWidth lineWidth <- test j <- j + 1 } else { if(align) { ## Align units in previous row free <- maxWidth - lineWidth if(j == 1) { colWidth[i - 1] <- maxWidth / 2 - tag$width[[i - 1]] / 2 } else { gapWidthAlign <- free / j start <- i - (j - 1) end <- start + j - 2 colWidth[start:end] <- colWidth[start:end] + cumsum(rep(gapWidthAlign, times=(j - 1))) } } row[i] <- row[i - 1] + 1 lineWidth <- tag$width[[i]] colWidth[i] <- 0 j <- 1 } } rowHeight <- tapply(unlist(tag$height), list(row), max) ## --- Is there to many rows for given dimension of a rectangle --- sumRowHeight <- sum(rowHeight) heightNum <- convertWidth(height, unitTo="npc", valueOnly=TRUE) if(sumRowHeight > heightNum) { msg <- c("can not fit into defined dimension;", "adjust dimension, fontsize or number of units;", "keeping else constant, height should be at least", sumRowHeight) stop(cat(msg, fill=TRUE)) } else { if(expandRow) { ## increase height of row to fit nicely heightDiff <- heightNum - sumRowHeight heightDiff <- heightDiff / max(row) rowHeight <- rowHeight + heightDiff } } ## We have to plot from top to bottom and text should be in the bottom ## or center of the line rowHeightCenter<- ifelse(justRow == "bottom", 0, rowHeight / 2) rowHeight <- heightNum - (cumsum(rowHeight) - rowHeightCenter) rowHeight <- rowHeight[row] textGpar$fontsize <- unlist(tag$fontsize) grid.text(label=names(x), gp=textGpar, x=unit(colWidth, units="npc"), y=unit(rowHeight, units="npc"), just=c("left", justRow), name="tag") } ## getNames() ## grid.edit("tag", gp=gpar(col="red")) ###------------------------------------------------------------------------ ### tagCloud.R ends here ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.