Hello all, I have a very large data frame (more than 5 million lines) as below (dput example at the end of mail):
Station Antenna Tag DateTime Power Events 1 2 999 22/07/2013 11:00:21 17 1 1 2 999 22/07/2013 11:33:47 31 1 1 2 999 22/07/2013 11:34:00 19 1 1 2 999 22/07/2013 11:34:16 53 1 1 2 999 22/07/2013 11:43:20 15 1 1 2 999 22/07/2013 11:43:35 17 1 To each Tag, in each Antenna, in each Station, I need to create a 10 min interval and sum the number of Events and mean of Power in the time interval, as below (complete wanted output at the end of mail). Station Antenna Tag StartDateTime EndDateTime Power Events 1 2 999 22/07/2013 11:00:21 22/07/2013 11:00:21 17 1 1 2 999 22/07/2013 11:34:16 22/07/2013 11:43:35 27 5 1 2 999 22/07/2013 11:44:35 22/07/2013 11:45:40 17 14 2 1 1 25/07/2013 14:19:45 25/07/2013 14:20:39 65 4 2 1 2 25/07/2013 14:20:13 25/07/2013 14:25:14 21 3 2 1 4 25/07/2013 14:20:46 25/07/2013 14:20:46 28 1 Show start and end points of each interval is optional, not necessary. I put both to show the irregular time interval: look to Tag 999: first interval are between 11:00 and 11:10, second between 11:34 and 11:44 and third are between 11:44 and 11:45. First I tried a for-loop, without success. After that, I tried this code: require (plyr) ddply (data, .(Station, Antenna, Tag, cut(data$DateTime, "10 min")), summarise, Power = round (mean(Power), 0), Events = sum (Events)) Is almost what I want, because cut() divided in regular time intervals, but in some cases I do not have this, and it split a unique observation in two. Any ideas to solve this issue? R version 3.0.1 (2013-05-16) -- "Good Sport" Platform: x86_64-w64-mingw32/x64 (64-bit) Windows 7 Professional Thanks in advanced, Raoni -- Raoni Rosa Rodrigues Research Associate of Fish Transposition Center CTPeixes Universidade Federal de Minas Gerais - UFMG Brasil rodrigues.ra...@gmail.com ##############################complete data dput structure(list(Station = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Antenna = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Tag = c(999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 999L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 4L, 18L, 18L, 18L, 21L, 22L, 36L, 36L, 36L, 36L, 36L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L, 48L), DateTime = structure(c(3L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 18L, 19L, 19L, 19L, 19L, 20L, 23L, 19L, 17L, 17L, 17L, 23L, 18L, 1L, 1L, 1L, 2L, 2L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 18L, 19L, 21L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 27L, 27L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L), .Label = c("19/06/2013 22:15", "19/06/2013 22:16", "22/07/2013 11:00", "22/07/2013 11:33", "22/07/2013 11:34", "22/07/2013 11:43", "22/07/2013 11:44", "22/07/2013 11:45", "25/07/2013 14:10", "25/07/2013 14:11", "25/07/2013 14:12", "25/07/2013 14:13", "25/07/2013 14:14", "25/07/2013 14:15", "25/07/2013 14:16", "25/07/2013 14:17", "25/07/2013 14:18", "25/07/2013 14:19", "25/07/2013 14:20", "25/07/2013 14:21", "25/07/2013 14:23", "25/07/2013 14:24", "25/07/2013 14:25", "25/07/2013 14:26", "25/07/2013 14:27", "25/07/2013 14:28", "25/07/2013 14:29", "25/07/2013 14:30"), class = "factor"), Power = c(17L, 31L, 19L, 53L, 15L, 17L, 21L, 12L, 15L, 22L, 19L, 15L, 13L, 14L, 15L, 12L, 23L, 19L, 16L, 20L, 30L, 37L, 25L, 167L, 24L, 14L, 24L, 28L, 31L, 48L, 158L, 18L, 25L, 102L, 101L, 110L, 90L, 58L, 202L, 192L, 179L, 159L, 155L, 184L, 189L, 64L, 203L, 231L, 207L, 171L, 196L, 169L, 169L, 216L, 202L, 242L, 175L, 215L, 156L, 114L, 232L, 210L, 208L, 119L, 206L, 188L, 215L, 210L, 171L, 187L, 189L, 212L, 211L, 206L, 174L, 194L, 160L, 26L, 204L, 198L, 207L, 90L, 162L, 131L, 168L, 179L, 227L, 198L, 131L, 149L, 205L, 209L, 86L, 84L, 175L, 176L, 203L, 192L, 139L, 193L, 197L, 220L, 208L, 155L, 125L, 217L, 114L, 167L, 108L, 208L, 178L, 207L, 210L, 215L, 170L, 200L, 219L, 246L), Events = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("Station", "Antenna", "Tag", "DateTime", "Power", "Events"), class = "data.frame", row.names = c(NA, -118L)) ########################complete desired result dput structure(list(Station = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Antenna = c(2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Tag = c(999L, 999L, 999L, 1L, 2L, 4L, 18L, 21L, 22L, 36L, 48L, 48L), StartDateTime = structure(c(2L, 3L, 4L, 8L, 9L, 10L, 6L, 12L, 7L, 1L, 5L, 11L), .Label = c("19/06/2013 22:15:49", "22/07/2013 11:00:21", "22/07/2013 11:34:16", "22/07/2013 11:44:35", "25/07/2013 14:10:44", "25/07/2013 14:18:15", "25/07/2013 14:19:44", "25/07/2013 14:19:45", "25/07/2013 14:20:13", "25/07/2013 14:20:46", "25/07/2013 14:23:24", "25/07/2013 14:25:49"), class = "factor"), EndDateTime = structure(c(2L, 3L, 4L, 8L, 10L, 9L, 5L, 11L, 6L, 1L, 7L, 12L), .Label = c("19/06/2013 22:16:29", "22/07/2013 11:00:21", "22/07/2013 11:43:35", "22/07/2013 11:45:40", "25/07/2013 14:18:42", "25/07/2013 14:19:44", "25/07/2013 14:20:06", "25/07/2013 14:20:39", "25/07/2013 14:20:46", "25/07/2013 14:25:14", "25/07/2013 14:25:49", "25/07/2013 14:30:59"), class = "factor"), Power = c(17L, 27L, 17L, 65L, 21L, 28L, 79L, 18L, 25L, 92L, 183L, 177L), Events = c(1L, 5L, 14L, 4L, 3L, 1L, 3L, 1L, 1L, 5L, 40L, 40L)), .Names = c("Station", "Antenna", "Tag", "StartDateTime", "EndDateTime", "Power", "Events"), class = "data.frame", row.names = c(NA, -12L)) -- Raoni Rosa Rodrigues Research Associate of Fish Transposition Center CTPeixes Universidade Federal de Minas Gerais - UFMG Brasil rodrigues.ra...@gmail.com [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.