I am simulating an ANOVA model and get a strange behavior from the summary function. To be more specific: please run the following code and see for yourself: the summary()[["r.squared"]] values of two identical models are quite different!!
## 3 x 3 ANOVA of two factors x and z on outcome y s.size <- 300 # the sample size p.z <- c(0.25, 0.5, 0.25) # the probabilities of factor z ## probabilities of x given z p.x.z <- matrix(c(40/60, 20/120, 6/60, 14/60, 80/120, 14/60, 6/60, 20/120, 40/60), 3, 3, byrow = TRUE) ## the regression coefficients according to the model.matrix X, that ## is computed later beta <- c(140, -60, -50, -80, 120, 100, -40, 60, 50)/40 ## the factor z and the factor x (in dependence of z) z <- x <- vector(mode = "integer", length = s.size) for(j in 1:s.size) { z[j] <- sample(1:3, 1, prob = p.z) x[j] <- sample(1:3, 1, prob = p.x.z[, z[j]]) } ## constructing the model.matrix X zf <- factor(z) contrasts(zf) <- contr.treatment(nlevels(zf), base = 3) zm <- model.matrix(~ zf) xf <- factor(x) contrasts(xf) <- contr.treatment(nlevels(xf), base = 3) xm <- model.matrix(~ xf) X <- cbind(zm, zm * xm[,2], zm * xm[,3]) ## the outcome y y <- X %*% beta + rnorm(s.size, 0, 4) ## the two linear models lm.v1 <- lm(y ~ X -1) lm.v2 <- lm(y ~ zf * xf) ## which are equivalent anova(lm.v1, lm.v2) print(sd(model.matrix(lm.v1) %*% coef(lm.v1))^2 / sd(y)^2) - print(sd(model.matrix(lm.v2) %*% coef(lm.v2))^2 / sd(y)^2) ## so far everything is fine but why are the following r.squared ## values quite different? print(summary(lm.v1)[["r.squared"]]) - print(summary(lm.v2)[["r.squared"]]) ______________________________________________ R-help@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html