> Here's a couple of similar plots created with ggplot2. I chose to > turn the data into a data frame with an explicit date column. Using a > log scale somewhat stabilises the variability. > > ## SAS-L traffic > sas <- structure(list(Jan = c(NA, 546L, 548L, 853L, 1007L, 894L, 514L, > 1720L, 1826L, 1941L, 1832L, 1636L, 2122L, 2722L, 2750L, 2305L, > 357L), Feb = c(NA, 511L, 734L, 1024L, 1150L, 1068L, 493L, 1519L, > 1537L, 1845L, 1846L, 1652L, 1960L, 1645L, 926L, 2255L, NA), Mar = c(NA, > 658L, 963L, 805L, 1108L, 945L, 659L, 1177L, 1915L, 2010L, 1755L, > 2188L, 629L, 1711L, 1728L, 2712L, NA), Apr = c(NA, 681L, 792L, > 1052L, 1315L, 784L, 1077L, 1163L, 1467L, 2199L, 1757L, 1826L, > 2169L, 2796L, 2766L, 2789L, NA), May = c(NA, 712L, 945L, 1163L, > 1212L, 448L, 778L, 1963L, 1735L, 2373L, 1863L, 1836L, 2283L, > 3147L, 2974L, 2025L, NA), Jun = c(NA, 751L, 1002L, 999L, 1127L, > 813L, 540L, 1615L, 1905L, 2133L, 1701L, 2606L, 2407L, 2723L, > 2691L, 2368L, NA), Jul = c(15L, 763L, 775L, 1184L, 1074L, 896L, > 476L, 1572L, 2027L, 2445L, 1926L, 1843L, 2061L, 761L, 2435L, > 2607L, NA), Aug = c(458L, 975L, 969L, 1053L, 692L, 823L, 612L, > 1696L, 1976L, 1492L, 1689L, 2143L, 1793L, 2027L, 2592L, 2584L, > NA), Sep = c(330L, 703L, 745L, 1176L, 947L, 894L, 1351L, 1491L, > 1439L, 1864L, 1646L, 1784L, 1365L, 2714L, 1868L, 2554L, NA), > Oct = c(219L, 805L, 691L, 1197L, 900L, 1129L, 1708L, 1669L, > 1592L, 2133L, 1832L, 1712L, 1427L, 2983L, 2320L, 2434L, NA > ), Nov = c(472L, 752L, 773L, 911L, 853L, 733L, 1720L, 1490L, > 1636L, 1663L, 1545L, 1786L, 1518L, 2848L, 2112L, 1984L, NA > ), Dec = c(517L, 666L, 765L, 844L, 677L, 492L, 1595L, 1298L, > 1424L, 1520L, 1445L, 2148L, 1524L, 2374L, 1948L, 1921L, NA > )), .Names = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", > "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), class = "data.frame", > row.names = c("1993", > "1994", "1995", "1996", "1997", "1998", "1999", "2000", "2001", > "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009" > )) > > ## s-news traffic > s <- structure(c(NA, 210, 264, 246, 230, 189, 197, 174, 109, 51, 48, > 5, 273, 173, 313, 232, 255, 179, 230, 161, 87, 59, 63, NA, 378, > 313, 285, 252, 242, 218, 257, 193, 99, 74, 58, NA, 293, 300, > 264, 300, 228, 196, 151, 182, 123, 48, 47, NA, 330, 334, 306, > 331, 219, 189, 164, 174, 107, 46, 31, NA, 243, 254, 247, 282, > 248, 217, 175, 109, 96, 34, 27, NA, 219, 284, 245, 258, 230, > 221, 154, 159, 84, 47, 40, NA, 209, 270, 302, 260, 207, 187, > 187, 144, 97, 39, 28, NA, 191, 300, 204, 260, 221, 186, 195, > 107, 68, 35, 41, NA, 241, 253, 251, 229, 280, 295, 150, 98, 73, > 70, 30, NA, 181, 300, 261, 232, 228, 197, 176, 82, 53, 56, 27, > NA, 141, 194, 176, 194, 177, 142, 176, 84, 20, 41, 36, NA), .Dim = c(12L, > 12L), .Dimnames = list(c("1998", "1999", "2000", "2001", "2002", > "2003", "2004", "2005", "2006", "2007", "2008", "2009"), c("Jan", > "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", > "Nov", "Dec"))) > > r <- structure(c(NA, 135, 226, 205, 558, 884, 1017, 1116, 1746, > 2075, 1714, 2490, 462, NA, 79, 145, 355, 583, 697, 1137, 1580, 1724, > 1920, 1907, 2583, NA, NA, 114, 195, 377, 651, 880, 1203, 1946, > 1703, 2270, 2191, 2740, NA, 92, 101, 189, 377, 470, 965, 1488, > 1657, 2057, 1818, 2145, 2487, NA, 36, 90, 161, 504, 552, 1057, > 1268, 1561, 1887, 2029, 2210, 2517, NA, 47, 105, 186, 418, 550, > 926, 1319, 1714, 2056, 1811, 2307, 2774, NA, 41, 110, 184, 293, > 615, 918, 1344, 1618, 1872, 1785, 2138, 3268, NA, 37, 64, 148, > 356, 562, 824, 1210, 1493, 1777, 1898, 2241, 2813, NA, 40, 94, > 203, 434, 678, 705, 1443, 1534, 1709, 1902, 2028, 2990, NA, 76, > 96, 231, 418, 657, 1055, 1567, 1712, 1810, 2328, 2708, 3037, > NA, 61, 184, 318, 433, 825, 1038, 1605, 1895, 1907, 2127, 2594, > 2730, NA, 57, 105, 221, 422, 530, 742, 1158, 1481, 1508, 1450, > 2028, 2399, NA), .Dim = c(13L, 12L), .Dimnames = list(c("1997", > "1998", "1999", "2000", "2001", "2002", "2003", "2004", "2005", > "2006", "2007", "2008", "2009"), c("Jan", "Feb", "Mar", "Apr", > "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))) > > library(reshape) > sas <- melt(as.matrix(sas), na.rm = TRUE) > r <- melt(r, na.rm = TRUE) > s <- melt(s, na.rm = TRUE) > names(r) <- names(s) <- names(sas) <- c("year", "month", "count") > > sas$software <- "sas" > s$software <- "s" > r$software <- "r" > all <- rbind(sas, s, r) > all$date <- with(all, > as.Date(paste(year, month, 15, sep = "-"), "%Y-%b-%d")) > > > library(ggplot2) > qplot(date, count, data = all, geom = "line", colour = software) + > geom_smooth(se = F, size = 1) > last_plot() + scale_y_log10(breaks = 10^(1:3), labels = 10^(1:3)) > > yearly <- ddply(all, .(year, software), function(df) c(count = sum(df$count))) > qplot(year, count, data = yearly, geom = "line", colour = software)
Hadley, You might want to remove the 2009 data from each of the three lists given that the January data is not yet complete. The result of including the January 2009 data in your plots is that the growth trajectory for the smoothed curves for SAS-L and R-Help appear to be leveling or even declining, when at least for R-Help, that is not the case. The S-News curve is not affected significantly, given the already declining counts. The effect of the 2009 data is most noticeable in the log scale plot. Thus: all <- subset(all, year < 2009) # Linear scale qplot(date, count, data = all, geom = "line", colour = software) + geom_smooth(se = F, size = 1) # Log scale last_plot() + scale_y_log10(breaks = 10^(1:3), labels = 10^(1:3)) HTH, Marc Schwartz ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.