This works. First we assign the results of dput() to a variable So we can use it. Then we eliminate the groups we don't need. Third we remake the factors to eliminate the groups and genes that do not appear in the data subset. Finally, compute the tests.
Dta <- structure(list(Gene = structure(c(1L, 12L, 19L, 20L, 21L, ....................lines omitted.......................... "Group", "A", "B", "C", "D", "E", "F", "G", "H"), row.names = c(NA, 25L), class = "data.frame") # Pull out just groups 5 and 6 Dtb <- Dta[Dta$Group %in% c(5, 6), ] # Check the resulting data frame - 8 observations, # four in each group, all measurements in B are 1 Dtb # Eliminate factor levels that do not exist in the reduced # data set Dtb$Gene <- factor(Dtb$Gene) Dtb$Group <- factor(Dtb$Group) # Mann-Whitney is the same as Wilcoxon Rank Sum test (see manual page) ?wilcox.test # Compute test for A wilcox.test(A~Group, Dtb) # Compute all the tests apply(Dtb[,3:10], 2, function(x) wilcox.test(x~Dtb$Group)) # Error relates to column B which is constant ---------------------------------------------- David L Carlson Associate Professor of Anthropology Texas A&M University College Station, TX 77843-4352 > -----Original Message----- > From: r-help-boun...@r-project.org [mailto:r-help-bounces@r- > project.org] On Behalf Of Oxenstierna > Sent: Friday, July 06, 2012 3:34 PM > To: r-help@r-project.org > Subject: Re: [R] Mann-Whitney by group > > Hi David, > > Thank you for the insight: I could have sworn I added a picture of the > data, but providing the actual data is worlds easier to deal with, I'm > sure. > I've never used dput(), so I entered it using the dataframe in question > as > the object, and I've pasted the results below. > > Essentially, I would like to run the two-sample independent test, > comparing > Group 5 (CD8.14 through CD8.17) to Group 6 (CD8.18 through CD8.21). So, > for > A, test Group 5 agains Group 6, for B, test Group 5 against Group 6, > and so > on. I'm not going to muddy the waters by telling you what I've tried; > suffice it to say that I'm looking for insights into how to structure R > commands to compare groups of data of this format. > > Many thanks in advance, > > David > > > structure(list(Gene = structure(c(1L, 12L, 19L, 20L, 21L, 22L, > 23L, 24L, 25L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, > 14L, 15L, 16L, 17L, 18L), .Label = c("CD8.1", "CD8.10", "CD8.11", > "CD8.12", "CD8.13", "CD8.14", "CD8.15", "CD8.16", "CD8.17", "CD8.18", > "CD8.19", "CD8.2", "CD8.20", "CD8.21", "CD8.22", "CD8.23", "CD8.24", > "CD8.25", "CD8.3", "CD8.4", "CD8.5", "CD8.6", "CD8.7", "CD8.8", > "CD8.9"), class = "factor"), Group = structure(c(8L, 8L, 8L, > 9L, 9L, 9L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, > 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L), .Label = c("Fabbf Ova > CD40", > "Fabbf Ova MHC2", "Fabbf Ova WT", "Fabbf WT", "Naïve CD40", "Naïve MHC > II", > "Naïve WT", "1", "2", "3", "4", "5", "6", "7"), class = "factor"), > A = c(19.4701946749544, 0.679440926463348, 0.69035683372563, > 0.347105466158261, 0.435480792190284, 0.338699910286907, > 0.651378057031152, 0.707065053752258, 0.685244609506316, > 0.816673858871597, 0.597009097584509, 0.592331304482431, > 0.709359033358704, 0.628406759227531, 0.78158729467231, > 1.0422377526669, > 0.61560003251142, 0.463755016733183, 0.419700860701392, > 0.380946898502731, > 0.41156961153081, 0.471790683365624, 0.552773224145722, > 0.466787799928649, > 0.767624372542755), B = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, > 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), C = c(0.914649979331863, > 1.443086801592, 0.928280641141244, 0.467498974059775, > 0.668742025741347, > 0.568417298005388, 0.778907650835673, 0.778765181169635, > 1.01103488277517, 0.674133483128923, 0.830400022230133, > 0.652687178870627, > 0.746893950266518, 0.765498308522646, 1.01273201749333, > 1.00309616129672, > 0.63015975419947, 0.615905247119739, 0.615687625199691, > 0.503136087800137, > 0.566164026974035, 0.631519467967541, 0.822126705285366, > 0.543873075815645, 1.00596108625425), D = c(13.3272657341526, > 3.08914950309865, 1.71836820240434, 0.723301573710509, > 1.21014411624732, > 1.92899377364865, 1.80280408189187, 2.25057819266424, > 2.23876060313374, > 1.30849425313072, 1.58782967140617, 1.19199809794126, > 1.64151140806787, > 0.241017500596534, 0.364896032519483, 0.322953808735804, > 0.2052110581509, 0.927601295331376, 0.808910781520832, > 0.538033121081646, > 0.655348783504307, 0.564449549672088, 0.521729926793001, > 0.414305517285192, 0.507084483980948), E = c(56.2830291897158, > 9.76091939190267, 4.80922410182105, 17.0056576949022, > 20.851046177766, > 17.9057247086369, 5.93332779160845, 4.73058157592946, > 5.59155211460608, > 9.67484467290805, 5.92374864612388, 7.12393623733123, > 5.33576126730867, > 10.3943422629275, 10.8732527705049, 12.4861085370674, > 12.0918705721064, > 13.3210661695018, 10.9410344557684, 15.1298307761675, > 13.0708078246191, > 9.4445293976312, 6.94340249514349, 5.07888688780375, > 8.33846787814466 > ), F = c(15.0459568981729, 21.6362955612539, 9.66673955488981, > 27.2276698483913, 18.1090094072926, 20.0952712980862, > 24.9249499974856, > 23.5540183530194, 29.6638363657906, 28.9779309040733, > 42.0402820641407, > 33.8068160394092, 51.7299064374737, 37.8306751403421, > 43.1955470199259, > 45.5125262939585, 40.3109474523637, 23.6341894633273, > 23.9721353180788, > 20.4920649252818, 24.8898447627354, 34.2686409607416, > 31.3815198841165, > 31.4947528368753, 43.2686436885025), G = c(15.9703031418086, > 5.30495997585743, 3.07594974529074, 8.28703732907722, > 10.9437825143868, > 6.91196232523896, 2.97808148581742, 2.6386825521864, > 2.2415006913088, > 5.00747306438661, 2.65238188782831, 3.18277515130905, > 3.14638620532385, > 2.5149505923191, 2.48862112414046, 2.97170069886913, > 1.91643165326171, > 7.50682774199005, 5.39102206185423, 5.2498453524987, > 7.17519969844757, > 2.66448841457179, 2.78444235996995, 1.62286520735228, > 2.48760726398266 > ), H = c(-1, -1, -1, 0.0036561124481055, 0.010723007432761, > 0.0196616746380801, 0.0371046164124276, -1, 3.27378673314144e-05, > -1, -1, -1, -1, 0.00816215783906802, 21.5424904701651, > 9.57900616157724, > 0.00735942043489242, 1.50346040901698, 0.0909450037365435, > 1.49237001404701e-05, 0.000775741472561218, -1, 17.9023582944659, > 0.0176891314806093, 33.4326253626981)), .Names = c("Gene", > "Group", "A", "B", "C", "D", "E", "F", "G", "H"), row.names = c(NA, > 25L), class = "data.frame") > > -- > View this message in context: http://r.789695.n4.nabble.com/Mann- > Whitney-by-group-tp4635618p4635667.html > Sent from the R help mailing list archive at Nabble.com. > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting- > guide.html > and provide commented, minimal, self-contained, reproducible code. ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.