hi I'm puzzled as to the relation between the REML score computed by gam and the formula (4) on p.4 here: http://opus.bath.ac.uk/22707/1/Wood_JRSSB_2011_73_1_3.pdf
I'm ok with this for poisson, or for quasipoisson when phi=1. However, when phi differs from 1, I'm stuck. #simulate some data library(mgcv) set.seed(1) x1<-runif(500) x2<-rnorm(500) linp<--0.5+x1+exp(-x2^2/2)*sin(4*x2) y<-rpois(500,exp(linp)) ##poisson #phi=1 m1<-gam(y~s(x1)+s(x2),family="poisson",method="REML") phi<-m1$scale #formula #1st term S1<-m1$smooth[[1]]$S[[1]]*m1$sp[1] S2<-m1$smooth[[2]]$S[[1]]*m1$sp[2] S<-matrix(0,19,19) for (i in 2:10) { for (j in 2:10) { S[i,j]=S1[i-1,j-1] S[i+9,j+9]=S2[i-1,j-1] } } beta<-m1$coef #penalised deviance Dp<-m1$dev+t(beta)%*%S%*%beta F1<-Dp/(2*phi) #2nd term F2<-sum(ifelse(y==0,0,y*log(y)-y-log(factorial(y)))) #3rd term X<-predict(m1,type="lpmatrix") W<-diag(fitted(m1)) H<-t(X)%*%W%*%X ldhs<-determinant(H+S,log=TRUE)$modulus[1] eigS<-eigen(S,only.values=TRUE)$val lds<-sum(log(eigS[1:16])) F3<-(ldhs-lds)/2 #4th term Mp=3 F4<-Mp/2*log(2*pi*phi) F1-F2+F3-F4 m1$gcv #reml score = formula ##quasipoisson with scale = 1 #fitting is identical to the poisson case #F1, F3, and F4 unchanged but F2 is now undefined m2<-gam(y~s(x1)+s(x2),family="quasipoisson",method="REML",scale=1) F1+F3-F4 m2$gcv #reml score = formula with F2 omitted ##quasipoisson with unknown scale m3<-gam(y~s(x1)+s(x2),family="quasipoisson",method="REML",scale=-1) phiq<-m3$scale #1st term S1q<-m3$smooth[[1]]$S[[1]]*m3$sp[1] S2q<-m3$smooth[[2]]$S[[1]]*m3$sp[2] Sq<-matrix(0,19,19) for (i in 2:10) { for (j in 2:10) { Sq[i,j]=S1q[i-1,j-1] Sq[i+9,j+9]=S2q[i-1,j-1] } } betaq<-m3$coef #penalised deviance Dpq<-m3$dev+t(betaq)%*%Sq%*%betaq F1q<-Dpq/(2*phiq) #2nd term undefined #3rd term Xq<-predict(m3,type="lpmatrix") Wq<-diag(fitted(m3)) Hq<-t(Xq)%*%Wq%*%Xq ldhsq<-determinant(Hq+Sq,log=TRUE)$modulus[1] eigSq<-eigen(Sq,only.values=TRUE)$val ldsq<-sum(log(eigSq[1:16])) F3q<-(ldhsq-ldsq)/2 #4th term Mp=3 F4q<-Mp/2*log(2*pi*phiq) F1q+F3q-F4q m3$gcv #quite different #but if phiq is replaced by the Pearson estimate of the scale P<-sum((y-fitted(m3))^2/fitted(m3)) phip<-P/(500-sum(m3$edf)) F1p<-Dpq/(2*phip) F3p<-F3q #third term independent of scale F4p<-Mp/2*log(2*pi*phip) F1p+F3p-F4p m3$gcv #closer but still wrong #is there a value of phi which makes this work? f<-function(t) (Dpq/(2*t) + F3q + Mp/2*log(2*pi*t) - m3$gcv)^2 optimise(f,interval=c(0.5,1.5))$min->phix Dpq/(2*phix) + F3q + Mp/2*log(2*pi*phix) m3$gcv #but what is phix - not the Pearson estimate or the scale returned by gam? thanks Greg Dropkin ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.