Hello list, I am not sure if the terminology that I am using here is widely used, however, I provide an example in the hopes that my problem will become clear. My basic problem is that I am unsure of how to 'constrain' my model estimates to reproduce the aggregate (by factor levels) observed dependent variable for a negative.binomial model. I realize this sounds rather vague, so I provide an example to illustrate:
When working with migration data, it is possible to model migration flows via a simple Poisson GLM like the following: > model1 <- glm(flows ~ destination.pop+distance+origin-0, data=migration, > family=poisson()) > model2 <- glm(flows ~ destination.pop+distance+origin-0, data=migration, > family=quasipoisson()) where origin is a factor of origins (1 - n). This has the effect of 'constraining' the predicted flows to reproduce the observed outgoing flows from each origin: > sums <- aggregate(migration$flows, by=list(migration$origin), sum) > sums1 <- aggregate(predict(model1, type='response'), > by=list(migration$origin), sum) > sums2 <- aggregate(predict(model2, type='response'), > by=list(migration$origin), sum) which works fine for both poisson and quasipoisson models. However, I would also like to fit a negative.binomial model to this data to (again) account for over-dispersion (which may still exist even after the constraint is imposed), however, the effect of the 'constraint' factor does not seem to work, due likely to the additional theta parameter. > model3 <- glm.nb(flows ~ destination.pop+distance+origin-0, data=migration) > sums3 <- aggregate(predict(model3,type='response'), > by=list(migration$origin), sum) > data.frame(origin=unique(migration$origin), observed=sums$x, > poisson=sums1$x,quasi=sums2$x,negbin=sums3$x) origin observed poisson quasi negbin 1 1 676308 676308 676308 651113.7 2 2 1155811 1155811 1155811 1141729.4 3 3 1789112 1789112 1789112 1845908.1 4 4 942162 942162 942162 978599.6 5 5 2484387 2484387 2484387 2486435.1 6 6 819222 819222 819222 747022.1 7 7 1237079 1237079 1237079 1405065.0 8 8 1067069 1067069 1067069 963339.5 9 9 2143172 2143172 2143172 2139171.5 Can anyone tell me how I might go about doing this for the negative.binomial model? Or perhaps better still, an alternative way of enforcing this 'constraint'? The following dataset is what I used for the above example (this data comes from Fotheringham and O’Kelly, 1989. Spatial interaction models: Formulations and applications. Dordrecht: Kluwer Academic Publishers): > dput(migration) structure(list(flows = c(283049L, 87267L, 29877L, 130830L, 21434L, 30287L, 21450L, 72114L, 180048L, 237229L, 60681L, 382565L, 53772L, 64645L, 43749L, 133122L, 79223L, 300345L, 286580L, 346407L, 287340L, 161645L, 97808L, 229764L, 26887L, 67280L, 281791L, 92308L, 49828L, 144980L, 113683L, 165405L, 198144L, 718673L, 551483L, 143860L, 316650L, 199466L, 89806L, 266305L, 17995L, 55094L, 230788L, 49892L, 252189L, 121366L, 25574L, 66324L, 35563L, 93434L, 178517L, 185618L, 192223L, 141679L, 158006L, 252039L, 30528L, 87987L, 172711L, 181868L, 89389L, 27409L, 134229L, 342948L, 110792L, 268458L, 394481L, 274629L, 279739L, 87938L, 289880L, 437255L), distance = c(219L, 1009L, 1514L, 974L, 1268L, 1795L, 2420L, 3174L, 219L, 831L, 1336L, 755L, 1049L, 1576L, 2242L, 2996L, 1009L, 831L, 505L, 1019L, 662L, 933L, 1451L, 2205L, 1514L, 1336L, 505L, 1370L, 888L, 654L, 946L, 1700L, 974L, 755L, 1019L, 1370L, 482L, 1144L, 2278L, 2862L, 1268L, 1049L, 662L, 888L, 484L, 662L, 1795L, 2380L, 1795L, 1576L, 933L, 654L, 1144L, 662L, 1278L, 1779L, 2420L, 2242L, 1451L, 946L, 2278L, 1795L, 1278L, 754L, 3174L, 2996L, 2205L, 1700L, 2862L, 2380L, 1779L, 754L), origin.pop = c(16.2876696349298, 16.2876696349298, 16.2876696349298, 16.2876696349298, 16.2876696349298, 16.2876696349298, 16.2876696349298, 16.2876696349298, 17.4279408399148, 17.4279408399148, 17.4279408399148, 17.4279408399148, 17.4279408399148, 17.4279408399148, 17.4279408399148, 17.4279408399148, 17.5110179983684, 17.5110179983684, 17.5110179983684, 17.5110179983684, 17.5110179983684, 17.5110179983684, 17.5110179983684, 17.5110179983684, 16.6083307371083, 16.6083307371083, 16.6083307371083, 16.6083307371083, 16.6083307371083, 16.6083307371083, 16.6083307371083, 16.6083307371083, 17.2140377110705, 17.2140377110705, 17.2140377110705, 17.2140377110705, 17.2140377110705, 17.2140377110705, 17.2140377110705, 17.2140377110705, 16.3878173980331, 16.3878173980331, 16.3878173980331, 16.3878173980331, 16.3878173980331, 16.3878173980331, 16.3878173980331, 16.3878173980331, 16.761264461712, 16.761264461712, 16.761264461712, 16.761264461712, 16.761264461712, 16.761264461712, 16.761264461712, 16.761264461712, 15.9304398925737, 15.9304398925737, 15.9304398925737, 15.9304398925737, 15.9304398925737, 15.9304398925737, 15.9304398925737, 15.9304398925737, 17.0532473904734, 17.0532473904734, 17.0532473904734, 17.0532473904734, 17.0532473904734, 17.0532473904734, 17.0532473904734, 17.0532473904734), destination.pop = c(17.4279408399148, 17.5110179983684, 16.6083307371083, 17.2140377110705, 16.3878173980331, 16.761264461712, 15.9304398925737, 17.0532473904734, 16.2876696349298, 17.5110179983684, 16.6083307371083, 17.2140377110705, 16.3878173980331, 16.761264461712, 15.9304398925737, 17.0532473904734, 16.2876696349298, 17.4279408399148, 16.6083307371083, 17.2140377110705, 16.3878173980331, 16.761264461712, 15.9304398925737, 17.0532473904734, 16.2876696349298, 17.4279408399148, 17.5110179983684, 17.2140377110705, 16.3878173980331, 16.761264461712, 15.9304398925737, 17.0532473904734, 16.2876696349298, 17.4279408399148, 17.5110179983684, 16.6083307371083, 16.3878173980331, 16.761264461712, 15.9304398925737, 17.0532473904734, 16.2876696349298, 17.4279408399148, 17.5110179983684, 16.6083307371083, 17.2140377110705, 16.761264461712, 15.9304398925737, 17.0532473904734, 16.2876696349298, 17.4279408399148, 17.5110179983684, 16.6083307371083, 17.2140377110705, 16.3878173980331, 15.9304398925737, 17.0532473904734, 16.2876696349298, 17.4279408399148, 17.5110179983684, 16.6083307371083, 17.2140377110705, 16.3878173980331, 16.761264461712, 17.0532473904734, 16.2876696349298, 17.4279408399148, 17.5110179983684, 16.6083307371083, 17.2140377110705, 16.3878173980331, 16.761264461712, 15.9304398925737), destination = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9"), class = "factor"), origin = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9"), class = "factor")), .Names = c("flows", "distance", "origin.pop", "destination.pop", "destination", "origin" ), row.names = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L), class = "data.frame") Regards, Carson -- Carson J. Q. Farmer ISSP Doctoral Fellow National Centre for Geocomputation National University of Ireland, Maynooth, http://www.carsonfarmer.com/ ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.