Hello Hadley, Thank you very much for your help! I have just received your book btw :)
On May 16, 2010, at 6:16 PM, Hadley Wickham wrote: >Hi Giovanni, > >Have a look at the classifly package for an alternative approach that >works for all classification algorithms. If you provided a small >reproducible example, I could step through it for you. > >Hadley Please find below a self contained example. I managed to complete the task using the graphics package. I would be curious to see how to get one of those really nice ggplot2 graphs with decision boundaries and class regions :) Thank you! Best regards, Giovanni # ========================================================================================= # (1) Generate sample labelled data # ========================================================================================= rm(list=ls()) # clear workspace library(mvtnorm) # needed for rmvnorm set.seed(11) # predictability of results sigma <- cbind(c(0.5, 0.3), c(0.3, 0.5)) # true covariance matrix mu <- matrix(0,nrow=3,ncol=2) mu[1,] <- c(3, 1.5) # true mean vectors mu[2,] <- c(4, 4) mu[3,] <- c(8.5, 2) x <- matrix(0, nrow = 300, ncol = 3) x[,3] <- rep(1:3, each = 100) # class labels x[1 :100,1:2] <- rmvnorm(n = 100, mean = mu[1,], sigma = sigma) # simulate data x[101:200,1:2] <- rmvnorm(n = 100, mean = mu[2,], sigma = sigma) x[201:300,1:2] <- rmvnorm(n = 100, mean = mu[3,], sigma = sigma) # ========================================================================================= # (2) Plot the labelled data # ========================================================================================= ## ## Function for plotting the data separated by classes, hacked out of predplot: ## http://stat.ethz.ch/teaching/lectures/FS_2010/CompStat/predplot.R ## plotclasses <- function(x, main = "", len = 200, ...) { xp <- seq(min(x[,1]), max(x[,1]), length=len) yp <- seq(min(x[,2]), max(x[,2]), length=len) grid <- expand.grid(xp, yp) colnames(grid) <- colnames(x)[-3] plot(x[,1],x[,2],col=x[,3],pch=x[,3],main=main,xlab='x_1',ylab='x_2') text(2.5,4.8,"Class 1",cex=.8) # class 1 text(4.2,1.0,"Class 2",cex=.8) # class 2 text(8.0,0.5,"Class 3",cex=.8) # class 3 } plotclasses(x) # ========================================================================================= # (3) Functions needed: calculate separating hyperplane between two given # classes and converting hyperplanes to line equations for the p=2 case # ========================================================================================= ## ## Returns the coefficients for the hyperplane that separates one class from another. ## Computes the coefficients according to the formula: ## $x^T\hat{\Sigma}^{-1}(\hat{\mu}_0-\hat{\mu}_1) - \frac{1}{2}(\hat{\mu}_0 + ## \hat{\mu}_1)^T\hat{\Sigma}^{-1}(\hat{\mu}_0-\hat{\mu}_1)+\log(\frac{p_0}{p_1})$ ## ## sigmainv(DxD) - precalculated sigma (covariance matrix) inverse ## mu1(1xD) - precalculated mu mean for class 1 ## mu2(1xD) - precalculated mu mean for class 2 ## prior1 - precalculated prior probability for class 1 ## prior2 - precalculated prior probability for class 2 ## ownldahyperplane <- function(sigmainv,mu1,mu2,prior1,prior2) { J <- nrow(mu) # number of classes b <- sigmainv%*%(mu1 - mu2) c <- -(1/2)*t(mu1 + mu2)%*%sigmainv%*%(mu1 - mu2) + log(prior1/prior2) return(list(b=b,c=c)) } ## ## Returns linear betas (intersect and slopes) for the given hyperplane structure. ## The structure is a list that matches the output of the function defined above. ## ownlinearize <- function(sephyp) { return(list(beta0=-sephyp$c/sephyp$b[2], # line slope and intersect beta1=-sephyp$b[1]/sephyp$b[2])) } # ========================================================================================= # (4) Run lda # ========================================================================================= library(MASS) # needed for lda/qda # read in a function that plots # lda/qda decision boundaries fit <- lda(x=x[,1:2],grouping=x[,3]) A <- fit$scaling # extract A matrix sigmahatinv <- A%*%t(A) # calculate sigma hat inverse priorhat <- fit$prior # get prior hat probabilities muhat <- fit$means # get mu hat # ========================================================================================= # (5) Calculate the separating hyperplanes which can be added using abline # or create the class boundaries using lines by fixing six points. # Run the abline one at the time after running step 2 anew # ========================================================================================= plotclasses(x) # Step 5.a) sephyp12 <- ownldahyperplane(sigmahatinv,muhat[1,],muhat[2,], # calculate dec. boundary 1-2 priorhat[1],priorhat[2]) line12 <- ownlinearize(sephyp12) # get line for 1-2 abline(line12$beta0,line12$beta1) plotclasses(x) # Step 5.b) sephyp23 <- ownldahyperplane(sigmahatinv,muhat[2,],muhat[3,], # calculate dec. boundary 2-3 priorhat[2],priorhat[3]) line23 <- ownlinearize(sephyp23) # get line for 2-3 abline(line23$beta0,line23$beta1) plotclasses(x) # Step 5.c) sephyp13 <- ownldahyperplane(sigmahatinv,muhat[1,],muhat[3,], # calculate dec. boundary 1-3 priorhat[1],priorhat[3]) line13 <- ownlinearize(sephyp13) # get line for 1-3 abline(line13$beta0,line13$beta1) # ========================================================================================= # (6) Run this snippet to see the class regions after running step 2 anew # ========================================================================================= xvalue <- -(line13$beta0-line23$beta0)/(line13$beta1-line23$beta1) # intersect of any two decision yvalue <- line13$beta0 + line13$beta1*xvalue # boundaries center <- xy.coords(x=xvalue,y= yvalue) # manually calculated intersect # of two decision boundaries plotclasses(x) intersect <- xy.coords(x=0,y=line12$beta0) lines(x=c(intersect$x,center$x),y=c(intersect$y,center$y)) yvalue <- 6 xvalue <- (yvalue - line23$beta0)/line23$beta1 intersect <- xy.coords(xvalue,yvalue) # manually calculated intersect lines(x=c(center$x,intersect$x),y=c(center$y,intersect$y)) intersect <- xy.coords(x=0,y=line13$beta0) lines(x=c(intersect$x,center$x),y=c(intersect$y,center$y)) ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.